''''
找出真实的弹幕列表的url
保存 csv 文件
制作词云图
'''
import re
import requests
import csv
# headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'}
# url = 'https://api.bilibili.com/x/v1/dm/list.so?oid=187026994'
# resp = requests.get(url,headers=headers)
# resp.encoding = resp.apparent_encoding
# html = resp.text
#
# q = re.compile('(.*?) ')
# danmu = re.findall(q,html)
# print(len(danmu))
# path = r'C:\Users\DELL\Desktop\python_wd\后浪.csv'
# for b in danmu:
# with open(path,'a',newline='',encoding='utf-8-sig')as f:
# f_csv= csv.writer(f)
# danmu = []
# danmu.append(b)
# f_csv.writerow(danmu)
# 注释为不让多次爬取
# 词云制作
# 两个都是第三方库
import jieba
import wordcloud
# 打开文件(保存 弹幕的 csv 文件)
path = r'C:\Users\DELL\Desktop\python_wd\后浪.csv'
f = open(path,encoding='utf-8')
txt = f.read() # 读取
txt_list = jieba.lcut(txt) # 处理 分词数据(返回数据类型列表)
#print(txt_list) # 理解为分词处理 后的结果
# 接下来 理解为 将其这些内容 拼接为 完整的 字符串
string = ' '.join(txt_list)
print(string)
# 创建对象 填写参数
w = wordcloud.WordCloud(width=1000,
height=700,
background_color='white', # 背景颜色
font_path='msyh.ttc', # 字体
scale=15, # 间隔
stopwords={' ','?','!',','}, # 停用词 剔除不需要显示的字符
contour_width=5, # 整个内容显示的宽度
contour_color='red', # 内容显示的颜色 红色边境
)
w.generate(string) # 传入处理好的字符窜
photo_path = r'C:\Users\DELL\Desktop\python_wd\houlang.png'
w.to_file(photo_path) # 保存
==============================================================================