import pymongo
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
print('导入成功!')
def data_yc(df,col):
#df:DataFrame
#col:列名
q1 = df[col].quantile(q=0.25)
q3 = df[col].quantile(q=0.75)
iqr = q3 - q1
tmax = q3 + 3*iqr
tmin = q3 - 3*iqr
return (tmax,tmin)
def per(df,cols,p):
#df:DataFrame
#cols:列名
#p:对比的列名
for col in cols:
df[col +'_per'] = df[col] / df[p]
return df
if __name__ == "__main__":
myclient = pymongo.MongoClient("mongodb://localhost:27017")
db = myclient['看电影了没']
data0725 = db['视频信息_dif'+'2019-07-25']
datalist0725 = list(data0725.find())
datadf0725 = pd.DataFrame(datalist0725)
df0725 = datadf0725.copy()
pic0725_1 = df0725[['分享_dif', '弹幕数_dif', '投币数_dif', '播放量_dif', '收藏_dif', '评论_dif']]
pd.scatter_matrix(pic0725_1,figsize=(20,12),
marker = 'o',
diagonal='kde',
alpha = 0.5,
range_padding=0.1)
pic0725_2 = df0725[['分享_x', '弹幕数_x', '投币数_x', '播放量_x', '收藏_x', '评论_x']]
pd.scatter_matrix(pic0725_2,figsize=(20,12),
marker = 'o',
diagonal='kde',
alpha = 0.5,
range_padding=0.1)
total = df0725[['投币数_x', '弹幕数_x', '收藏_x', '评论_x', '分享_x','播放量_x','播放量_dif','上线年份']].groupby('上线年份').sum()
count = df0725[['电影名称','上线年份']].groupby('上线年份').count()
total = total.join(count)
total.columns = ['投币数','弹幕数','收藏','评论','分享','播放量','播放量_dif','视频数量']
print(total)
print(total.info())
cols = ['投币数', '弹幕数', '收藏', '评论', '分享','播放量','播放量_dif']
p = '视频数量'
total = per(total,cols,p)
#print(total)
fig,axe = plt.subplots(1,2,figsize = (12,6))
total[['投币数','弹幕数','收藏','评论','分享']].plot(kind = 'bar',ax = axe[1],stacked = True , title = '年度其他情况',rot = 0)
total['播放量'].plot(kind = 'bar',ax = axe[0],title = '年度播放量',rot = 0)
print(pic0725_1.corr())
df_2017 = df0725[df0725['上线年份'] == 2017]
df_2018 = df0725[df0725['上线年份'] == 2018]
df_2019 = df0725[df0725['上线年份'] == 2019]
fig1,axe = plt.subplots(1,3,figsize = (8,6),sharey=True)
df_2017['播放量_x'].plot.box(ax = axe[0],title = '2017年各视频播放量',whis = 3)
df_2018['播放量_x'].plot.box(ax = axe[1],title = '2018年各视频播放量',whis = 3)
df_2019['播放量_x'].plot.box(ax = axe[2],title = '2019年各视频播放量',whis = 3)
hot_2017 = df_2017[df_2017['播放量_x'] > data_yc(df_2017,'播放量_x')[0]]
hot_2018 = df_2018[df_2018['播放量_x'] > data_yc(df_2018,'播放量_x')[0]]
hot_2019 = df_2019[df_2019['播放量_x'] > data_yc(df_2019,'播放量_x')[0]]
hot = df0725[df0725['播放量_x'] > data_yc(df0725,'播放量_x')[0]]
hotlst = []
hotlst.extend(hot['电影名称'].values.tolist())
hotlst.extend(hot_2017['电影名称'].values.tolist())
hotlst.extend(hot_2018['电影名称'].values.tolist())
hotlst.extend(hot_2019['电影名称'].values.tolist())
hotlst = list(set(hotlst))
#print(hotlst)
hot_df = df0725[df0725['电影名称'].isin(hotlst)]
del hot_df['_id']
hot_df = hot_df[['上线年份','播放量_x', '电影名称']].sort_values('播放量_x',ascending = False).reset_index()
del hot_df['index']
from pyecharts.charts import Bar
from pyecharts import options as opts
from pyecharts.globals import ThemeType
hot1,hot2,hot3 = hot_df.copy(),hot_df.copy(),hot_df.copy()
hot1[hot1['上线年份'] != 2019] = 0
hot2[hot2['上线年份'] != 2018] = 0
hot3[hot3['上线年份'] != 2017] = 0
xlim = hot_df['电影名称'].values.tolist()
lst1 = hot1['播放量_x'].values.tolist()
lst2 = hot2['播放量_x'].values.tolist()
lst3 = hot3['播放量_x'].values.tolist()
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(xlim)
.add_yaxis("2019", lst1, stack = '1',gap="15%")
.add_yaxis("2018", lst2, stack = '1',gap="15%")
.add_yaxis("2017", lst3, stack = '1',gap="15%")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title="主标题", subtitle="副标题"))
.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15)))
)
bar.render('01.html')
hot1,hot2,hot3 = hot_df.copy(),hot_df.copy(),hot_df.copy()
hot1[hot1['上线年份'] != 2019] = 0
hot2[hot2['上线年份'] != 2018] = 0
hot3[hot3['上线年份'] != 2017] = 0
xlim = hot_df['电影名称'].values.tolist()
lst1 = hot1['播放量_x'].values.tolist()
lst2 = hot2['播放量_x'].values.tolist()
lst3 = hot3['播放量_x'].values.tolist()
bar = (
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
.add_xaxis(xlim)
.add_yaxis("2019", lst1, stack = '1',gap="15%")
.add_yaxis("2018", lst2, stack = '1',gap="15%")
.add_yaxis("2017", lst3, stack = '1',gap="15%")
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title="主标题", subtitle="副标题"))
.set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=15)))
)
bar.render('02.html')