Python爬取58二手房标题和价格

import requests
import chardet
from lxml import etree

url = 'https://cn.58.com/ershoufang/?PGTID=0d100000-008d-2b3d-c8f5-0c7b463b18e3'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.47'
}

resp = requests.get(url=url, headers=headers)
encoding = chardet.detect(resp.content)['encoding']
resp.encoding = encoding
page_text = resp.text

tree = etree.HTML(page_text)
#/text()获取标签中的直系文本内容
h3_list = tree.xpath('//div[@class="property-content-title"]/h3[@class="property-content-title-name"]/text()')
price_list = tree.xpath('//div[@class="property-price"]//span[@class="property-price-total-num"]/text()')

h3_price = []
#取从0到h3_list和price_list中较短列表的长度,确保在两个列表之间进行比较时不会出现索引越界的错误
for i in range(min(len(h3_list), len(price_list))):
    h3_price.append((h3_list[i], price_list[i] + "w"))

#print(h3_price)

with open('D:\\Programming\\Microsoft VS Code Data\\WebCrawler\\data\\58\\58.txt', 'w', encoding=encoding) as fp:
    for item in h3_price:
        fp.write(f'{item[0]} {item[1]}\n')
print('爬取完成!!!')


Python爬取58二手房标题和价格_第1张图片

你可能感兴趣的:(Python,python,爬虫,58二手房标题)