"""
爬取链家地址二手房信息
目标:爬取标题名称 地址 总价
"""
import requests
import urllib.request
import urllib.parse
import re
url = "http://bd.lianjia.com/ershoufang/pg"
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
for j in range(100):
urls = url + str(j)
response = requests.get(urls, headers=headers)
response.encoding = "utf-8"
html = response.text
L1 = re.findall('data-el="region">(\D+)', html)
L2 = re.findall('class="totalPrice">(\d+)万
'
, html
)
data1
= response
.content
for i
in range(len(L1
)):
L3
= L1
[i
] + " " + L2
[i
] + "万"+ " "
with open("房.txt", "a", encoding
="utf-8") as f
:
f
.write
(L3
)
f
.write
("\n")