Pandas读取Excel表格绘制图片
导包
import pandas as pd
import matplotlib.pyplot as plt
显示中文字符
plt.rcParams['font.sans-serif'] = ['SimHei']
读取文件地址、index_col=0表示将第一列作为索引
data = pd.read_excel('D:\pythonProject\票房.xlsx',index_col=0)
添加标题,横纵坐标
data.plot(style='.-.')
plt.title('票房数据')
plt.ylabel('票房(亿元)')
plt.xlabel('上映时间')
保存为图片
plt.savefig('票房数据.png')
有些不会自动打开图片结果,需要手动打开
plt.show()
导入需要的库
import re
import time
import random
import requests
import json
import jieba
from wordcloud import WordCloud
网上随便搜索一个网址,如豆瓣阿里java开发手册的评论
https://book.douban.com/subject/27605355/comments/?start=60&limit=20&status=P&sort=new_score
获取->解析网页数据->保存为本地TXT(之前总是找不到存哪去了 ,后来发现刷新文件目录就有了)
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 10 12:01:35 2020
@author: kimol_love
"""
import re
import time
import json
import random
import requests
# 定义相关函数
def get_comment(mid, page):
'''
获得评论页面的HTML
'''
start = (page - 1) * 20
url = 'https://book.douban.com/subject/%s/comments/?start=%d&limit=20&status=P&sort=new_score' % (mid, start)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:81.0) Gecko/20100101 Firefox/81.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'}
cookies = {}
res = requests.get(url, headers=headers, cookies=cookies)
html = res.text
return html
def parse_comment(html):
'''
解析HTML中的评论
'''
comment = re.findall('(.*?)', html)
return comment
def crawl_comment(mid, N, name):
'''
爬取指定页数的评论,并保存在本地
'''
comments = []
for p in range(1, N + 1):
html = get_comment(mid, p)
comment = parse_comment(html)
comments.extend(comment)
print('《%s》第%d页评论爬取完成(%d条)' % (name, p, len(comment)))
time.sleep(random.uniform(3, 5))
with open('%s.txt' % name, 'w') as f:
f.write(json.dumps(comments))
# 爬取阿里评论
crawl_comment('27605355', 20, '阿里java开发手册')
转出词云
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 10 12:22:53 2020
@author: kimol_love
"""
import json
import jieba
from wordcloud import WordCloud
# 定义相关函数
def create_wordcloud(comments, name):
'''
根据评论列表创建词云
'''
content = ''.join(comments)
wl = jieba.cut(content, cut_all=True)
wl_space_split = ' '.join(wl)
wc = WordCloud('simhei.ttf',
background_color='white', # 背景颜色
width=1000,
height=600, ).generate(wl_space_split)
wc.to_file('%s.png' % name)
# 词云数据分析
with open('阿里java开发手册.txt', 'r') as f:
comments_jiang = json.loads(f.read())
create_wordcloud(comments_jiang, '阿里评论')
这是大佬的原文
https://blog.csdn.net/kimol_justdo/article/details/108995430