利用lxml中的etree 查询节点的某些属性值

import urllib2
from lxml import etree

user_agent ='Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
headers = { 'User-Agent' : user_agent }
myUrl='https://social.msdn.microsoft.com/Forums/zh-CN/home?searchTerm=win10&sort=relevancedesc&brandIgnore=true&page=1';
req = urllib2.Request(myUrl, headers = headers) 
html=urllib2.urlopen(req).read()
print 'size is: ',len(html)
tree = etree.HTML(html)
nodes = tree.xpath("//*[@id='homePageResultsContainer']/ul/*")
items = []
for node in nodes:
    n=node.xpath("./div/a/@href")
    print n

'''fw=open('G:/java/1.txt','w')

fw.write(html)

fw.close()'''




你可能感兴趣的:(python)