python爬虫爬取B站弹幕(一步到位,开袋即食,非常方便)

只用改网站就可以。
非常方便。

# -*- codeing = utf-8 -*-
# @Time : 2021/1/23 18:40
# @Author : 老七疯狂吸氧
# @file bilibili.py
# @Software:PyCharm

import re
import requests
def main():

    url = "https://www.bilibili.com/video/BV1kh411y78a"    #此处个更换网址,只适用与哔哩哔哩的视频

    datalist = get_html(url)
    cid = saveurl(datalist)
    dmurl='https://comment.bilibili.com/'+cid+'.xml'
    datalist = get_Html(dmurl)
    list=datalist.content.decode("utf-8")
    savelist(list)
def get_html(url):         #一次请求
    headers = {
     
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }         #请输入你个人的User-Agent
    response = requests.get(url, headers=headers)
    return response.text
def get_Html(url):        #二次请求
    headers = {
     
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
    }         #请输入你个人的User-Agent
    response = requests.get(url, headers=headers)
    return response
def saveurl(baseurl):     #获取cid
    findlink=re.compile(r'"cid":(.*?),"bvid":')
    cid = re.findall(findlink,baseurl)
    cid = list(cid)[1]
    return cid
def savelist(list):
    danmu = re.compile(r'(.*?)')
    File = open("B站弹幕.txt", "w", encoding="utf-8")
    data = re.findall(danmu,list)
    for i in data:
        File.writelines(i)
        File.writelines("\n")
    File.close()
if __name__ =="__main__":
    main()
    print("爬取完毕")

你可能感兴趣的:(python)