#!/usr/bin/python import urllib2 import HTMLParser class MyParser(HTMLParser.HTMLParser): # # ==> attr : ('property', 'og:type') # ==> attr: ('content', 'xxxxx-feed:photo') # # ==> attr: ('property', 'og:image') # ==> attr: ('content', 'http://media.com/8a7ef6a/mf8ylpaOK51rx0ocqo1_500.gif') def __init__(self): HTMLParser.HTMLParser.__init__(self) self.found_type = False self.found_photo = False self.found_image = False self.image = '' def handle_starttag(self, tag, attrs): if tag != 'meta' : return for attr in attrs: #print " attr:", attr if self.found_type==False: if attr == ('property','og:type'): #print " attr:", attr self.found_type = True else: if self.found_photo==False and attr == ('content','xxxxx-feed:photo'): self.found_type = True elif attr == ('property','og:image'): self.found_image = True elif attr[0] == 'content' and self.found_image: print " attr:", attr self.image = attr[1]; else: self.found_image = False r = urllib2.urlopen('http://YOUR.xxxxx.com/random') d = r.read().decode('utf-8'); p = MyParser() #d = ' ' p.feed(d) print p.image