爬取南京链家租房信息
代码如下 代码片
.
import requests
from lxml import etree
if name == “main”:
#设置一个通用URL模板
headers={
‘User-Agent’:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36’
}
url=‘https://nj.lianjia.com/zufang/pg%d’
fp=open(‘lianjia.txt’,‘w’,encoding=‘utf-8’)
for pagenum in range(1,101):
new_url=format(url%pagenum)
page_text=requests.get(url=new_url,headers=headers).text
tree=etree.HTML(page_text)
div_list=tree.xpath(’//div[@class=“content__list”]/div’)
for div in div_list:
title=div.xpath(’./div/p/a/text()’)[0]
price=div.xpath(’./div/span/em/text()’)[0]
print(title+price+“元/月”)
fp.write(title+price+“元/月”)