今天给大家介绍的是Python爬虫二手房数据。
Python版本: 3.6
相关模块:
requests模块
parsel模块
csv模块
re模块
安装Python并添加到环境变量,pip安装需要的相关模块即可。
浏览器中打开我们要抓的页面
按F12进入开发者工具,查看我们想要的二手房数据
这里我们需要页面数据就可以了
for page in range(1, 11):
print(f'正在爬取第{page}页的数据内容')
url = f'https://cs.lianjia.com/ershoufang/pg{page}/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
selector = parsel.Selector(response.text)
href = selector.css('.sellListContent li .title a::attr(href)').getall()
for index in href:
html_data = requests.get(url=index, headers=headers).text
select = parsel.Selector(html_data)
title = select.css('.main::text').get()
price = select.css('.price .total::text').get()
price_1 = select.css('.unitPrice span::text').get()
unit_type = select.css('.room .mainInfo::text').get()
floor = select.css('.room .subInfo::text').get().split('/')
floor_1 = floor[0].replace('楼层', '')
floor_2 = floor[1].replace('共', '').replace('层', '')
face = select.css('.type .mainInfo::text').get()
furnish = select.css('.type .subInfo::text').get().split('/')
furnish_1 = furnish[0]
furnish_2 = furnish[1]
acreage = select.css('.area .mainInfo::text').get().replace('平米', '')
community = select.css('.communityName .info::text').get()
area_list = select.css('.areaName .info::text').getall()
area_list_1 = select.css('.areaName .info a::text').getall()
area_str = ''.join(area_list).strip() # strip()
area = '-'.join(area_list_1) + '-' + area_str
print(title, price, price_1, unit_type, floor, face, furnish, acreage, community, area, index, sep=' | ')