python爬取——到我碗里来,汽车之家

由于上一章反爬了,没有成果,这次换了个网站——汽车之家,下面我们来爬吧!!

上代码!!

# -*- coding : utf-8 -*-
import re
import requests
import openpyxl
from bs4 import BeautifulSoup

def askUrl(url):
    try:
        req = requests.get(url) #连接地址
        req.encoding = 'gb2312'
        return req.text
    except:
        return ''

def getData(baseUrl):
    # 获取价格
    bold = re.compile(r'.*target="_self">(.*?)')
    # 获取价格
    number = re.compile(r'(.*)')
    datalist = []
    for i in range(10):
        url = baseUrl + str(i+1)+'.html'
        print(url)
        html = askUrl(url)
        soup = BeautifulSoup(html, "html.parser")
        for item in soup.find_all('div', class_='list-cont'):
            data = []
            item = str(item)  # 转换成字符串
            title = re.findall(bold, item)
            data.append(title)
            score = re.findall(number, item)
            data.append(score)
            datalist.append(data)
    return datalist


def saveData(data, path):
    book =openpyxl.Workbook()
    sheet = book.create_sheet("cars")
    col =('名称', '评分')
    sheet.append(col)  # 添加列头
    for i in range(150):
        row = data[i]
        for j in range(2):
            if len(row[j]) > 0:
                sheet.cell(row=(i +2), column=(j+1), value=row[j][0])
    book.save(path)  # 保存

#主函数
def main():
    print("开始爬取......")
    #爬取8-12万汽车地址
    baseurl = 'https://car.autohome.com.cn/price/list-8_12-0-0-0-0-0-0-0-0-0-0-0-0-0-0-'
    datalist = getData(baseurl)
    savapath='cars.xls'
    saveData(datalist,savapath)

main()
print("爬取完成")

展示成果!!

python爬取——到我碗里来,汽车之家_第1张图片

你可能感兴趣的:(python笔记)