赶集网(Python)

代码:

import requests

import re

import xlwt

base_url = "https://xa.58.com/zufang/pn{}"

header = {

    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"

}

def get_data(url):

    try:

        resp = requests.get(url, headers=header)

        if resp.status_code == 200:

            print("请求成功!")

            parse_data(resp.text)

        else:

            print("请求失败!")

    except requests.RequestException as e:

        print(f"请求错误: {e}")
 

def parse_data(data):

    houses = re.findall(r']*?>(.*?).+?room">(.*?)

.+?class="strongbox">(.*?)', data,

                        re.DOTALL)

    house_list = []

    for h in houses:

        sublist = [h[0].strip(), h[1].strip(), h[2].strip()]

        house_list.append(sublist)

    print(house_list)

    save_data(house_list)

def save_data(house_list):

    workbook = xlwt.Workbook()

    sheet = workbook.add_sheet("赶集网1")

    for row, data in enumerate(house_list):

        for col, value in enumerate(data):

            sheet.write(row, col, value)

    workbook.save("赶集网1.xls")  # 修改为.xls
 

if __name__ == '__main__':

    for i in range(1, 6):

        print(f"开始请求第{i}页数据")

        get_data(base_url.format(i))

你可能感兴趣的:(python,开发语言)