##读取数据
import csv
from collections import Counter
'''
评论时间:time
评论昵称:nickName
性别:gender
所在城市:cityName
内容:content
猫眼等级:userLevel
评分:score
'''
time,nickName,gender,cityName,content,userLevel,score=[],[],[],[],[],[],[]
def read_csv():
content=''
with open('/home/kiosk/PycharmProjects/Scrapy/爬取猫眼评论/data.csv','r',encoding='utf-8',newline='') as file_test:
reader=csv.reader(file_test)
i=0
for row in reader:
if i!=0:
time.append(row[0])
nickName.append(row[1])
gender.append(row[2])
cityName.append(row[3])
content=content+row[4]
userLevel.append(row[5])
score.append(row[6])
#print(row)
i+=1
print('总共有:%s 条数据' %i )
return content
read_csv()
from pyecharts import Pie, Style, Geo
#from pyecharts import options as opts
def sex_views(gender):
from matplotlib import pyplot as plt
list_num = []
list_num.append(gender.count('')) ##未知
list_num.append(gender.count('1')) ##男
list_num.append(gender.count('2')) ##女
attr = ['其他','男', '女']
print(list_num)
pie=Pie('性别分析饼图')
pie.add('',attr,list_num,is_label_show=True)
pie.render('/home/kiosk/PycharmProjects/Scrapy/爬取猫眼评论/sex.html')
##评论城市分布可视化:
def city_views(cityName):
city_list=list(set(cityName))
city_dict={
city_list[i]:0 for i in range(len(city_list))
}
##计算各个城市的人数
for i in range(len(city_list)):
city_dict[city_list[i]]=cityName.count(city_list[i])
##根据数量从大到小(字典的value值)排序
sort_dict=sorted(city_dict.items(),key=lambda city_dict:city_dict[1],reverse=True) ##反向排序
# print(city_dict.items())
# print(sort_dict)
city_name=[]
city_count=[]
for i in range(len(sort_dict)):
city_name.append(sort_dict[i][0])
city_count.append(sort_dict[i][1])
from pyecharts import Bar
bar=Bar('城市分布图')
bar.add('',city_name,city_count,is_label_show=True,is_datazoom_show=True)
bar.render('/home/kiosk/PycharmProjects/Scrapy/爬取猫眼评论/city.html')
运行:
read_csv()
sex_views(gender)
city_views(cityName)
pyecharts 是一个用于生成 Echarts 图表的类库。Echarts 是百度开源的一个数据可视化 JS 库 ,将python与echarts结合的强大的数据可视化工具
(Scrapy) [kiosk@asimov ~/PycharmProjects/Scrapy]$ pip install pyecharts==0.5
Successfully installed dukpy-0.2.2 future-0.17.1 javascripthon-0.10 jupyter-echarts-pypkg-0.1.1 lml-0.0.2 macropy3-1.1.0b2 pillow-6.1.0 pyecharts-0.5.0 pyecharts-javascripthon-0.0.6
最新版本为1.3.0
Bar3D(3D 柱状图)
Boxplot(箱形图)
EffectScatter(带有涟漪特效动画的散点图)
Funnel(漏斗图)
Gauge(仪表盘)
Geo(地理坐标系)
Graph(关系图)
HeatMap(热力图)
Kline(K线图)
Line(折线/面积图)
Line3D(3D 折线图)
Liquid(水球图)
Map(地图)
Parallel(平行坐标系)
Pie(饼图)
Polar(极坐标系)
Radar(雷达图)
Sankey(桑基图)
Scatter(散点图)
Scatter3D(3D 散点图)
ThemeRiver(主题河流图)
WordCloud(词云图)
http://pyecharts.herokuapp.com/