import requests
import re
from bs4 import BeautifulSoup
import csv
import pandas as pd
from itertools import islice
import numpy as np
def get_csv(url): #请求的方式得到数据jason文件
bvIndex = url.find('BV')
id = url[bvIndex:]
url='https://api.bilibili.com/x/player/pagelist?bvid='+id+'&jsonp=jsonp'
r=requests .get(url=url,headers=headers)#获取视频的cid
cid=r.json()['data'][0]['cid']
urll='https://comment.bilibili.com/'+str(cid)+'.xml'#利用cid获取对应弹幕
rr=requests.get(url=urll,headers=headers)
rr.encoding='uft-8'
soup=BeautifulSoup(rr.text,'lxml')
danmu_info=soup.find_all('d')
all_info=[]
all_text=[]
for i in danmu_info:
all_info.append(i['p']) #得到弹幕信息
all_text.append(i) #得到弹幕内容
f = open('danmu_info.csv', 'w', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(["时间", "弹幕模式", "字号大小", "颜色", "Unix格式时间戳", "弹幕种类", "发送者ID", "rowID"]) #弹幕信息就是按这么个顺序排列的
for i in all_info:
i=str(i).split(',') #把弹幕信息分隔好
csv_writer.writerow(i)
f.close()
f = open('danmu_text.csv', 'w', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(["内容"])
for i in all_text:
csv_writer.writerow(i)
f.close()
if __name__=='__main__':
get_csv('https://www.bilibili.com/video/BV11a4y1h7wd')