//img/@src 得到所有img标签的src值
//a/text() 得到所有A标签中的文本
name=response.xpath('//img@src').getall() #getall()从对象中获取具体值
import requests,re from lxml import etree import json,sys url1="http://tieba.baidu.com/f?kw=%E6%AD%A6%E6%B1%89&red_tag=d3345989481" html=requests.get(url1) select=etree.HTML(html.text) quyu=select.xpath('//li[@class=" j_thread_list clearfix"]') #print(quyu) for each in quyu: auth=json.loads(each.xpath('@data-field')[0]) author=auth['author_name'] id=auth['id'] #print(type(auth)) print(author,id)
结果:
==================== RESTART: D:/Python/Python37/tieba.py ====================
silent烟花 6496741920
silent烟花 6497028530
silent烟花 6497025455
silent烟花 6497024605
silent烟花 6497012358
silent烟花 6497001415
silent烟花 6496980363
silent烟花 6496995996
silent烟花 6496995111
silent烟花 6496994178
贴吧官方 6496068827