#!/usr/bin/python
from lxml import etree
from io import StringIO, BytesIO
from HTMLParser import HTMLParser
from lxml import html
if __name__ == '__main__':
# print(type(data))
data ={u'seatbid': [{u'bid': [{u'nurl': u'http://baidu.com', u'cid': u'986191', u'crid': u'4581591', u'price': 0.1317348624101392, u'adid': u'56cb0b8b70f8f8100805eeda', u'adomain': [u'mobvista.com'], u'adm': u' ', u'id': u'222'}]}], u'id': u'bid1-7'}
admstr = data['seatbid'][0]['bid'][0]['adm']
tree = html.fromstring(admstr)
# print [e.text_content() for e in tree.xpath("//a")] // result is ok
parser = etree.HTMLParser()
tree = etree.parse(StringIO(admstr), parser)
ahref = [a.attrib['href'] for a in tree.xpath("//a")]
imghref=[img.attrib['src'] for img in tree.xpath("//img")]
print "ahref is :",ahref[0]
print "imghref is :",imghref[0]