新闻爬虫

简单的新闻爬虫,腾讯新闻

import requests, json

# 新闻爬虫主要函数
def qq_news(url):
    # 伪装请求头
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'
    }
    response = requests.get(url, headers=headers)  # 发送请求获取响应
    if response.status_code != 200:
        return "请求失败,状态码%s" % response.status_code

    datas = json.loads(response.text)["data"]  # 解析json数据,并提取关键信息

    # 提取详细信息
    for data in datas:
        news_dict = {}
        news_dict["title"] = data["title"]
        news_dict["source"] = data["source"]
        news_dict["vurl"] = data["vurl"]
        news_dict["update_time"] = data["update_time"]
        print(news_dict)

# 爬5页
for page in range(5):
    url = 'https://pacaio.match.qq.com/irs/rcd?cid=137&token=d0f13d594edfc180f5bf6b845456f3ea&id=&ext=top&page={}'.format(
        page)
    qq_news(url)

你可能感兴趣的:(新闻爬虫)