import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
from bokeh.plotting import figure,show,output_file
from bokeh.models import ColumnDataSource
df=pd.read_excel("双十一淘宝美妆数据.xlsx")
print(df)
df.fillna(0,inplace=True)
df.index=df['update_time']
df['date']=df.index.day
data1=df[['id','title','店名','date']]
d1=data1[['id','date']].groupby(by='id').agg(['min','max'])['date']
id_11=data1[data1["date"]==11]["id"]
d2=pd.DataFrame({"id":id_11,"双十一当天是否售卖":True})
id_date=pd.merge(d1,d2,left_index=True,right_on='id',how= 'left')
id_date.fillna(False,inplace=True)
m=len(d1)
m_11=len(id_11)
m_per=m_11/m
print("双十一当天参与活动的商品共%i个,占比为%.2f%%" %(m_11,m_per*100))
id_date['type']="待分类"
id_date['type'][(id_date['min']<11)&(id_date['max']>11)]='A'
id_date['type'][(id_date['min']<11)&(id_date['max']==11)]='B'
id_date['type'][(id_date['min']==11)&(id_date['max']>11)]='C'
id_date['type'][(id_date['min']==11)&(id_date['max']==11)]='D'
id_date['type'][id_date['双十一当天是否售卖']==False]='F'
id_date['type'][id_date['max']<11]='E'
id_date['type'][id_date['min']>11]='G'
result1=id_date['type'].value_counts()
print()
result1=result1.loc[['A','B','C','D','E','F','G']]
from bokeh.palettes import brewer
colori=brewer['YlGn'][7]
plt.axis('equal')
plt.pie(result1,labels=result1.index,autopct='%.2f%%',colors=colori,startangle=90,radius=1.5,counterclock=False)
id_not11=id_date[id_date["双十一当天是否售卖"]==False]
df_not11=id_not11[["id","type"]]
data_not11=pd.merge(df_not11,df,on='id',how="left")
id_con1=id_date['id'][id_date["type"]=="F"].values
data_con2=data_not11[["id","title","date"]].groupby(by=["id","title"]).count()
title_count=data_con2.reset_index()["id"].value_counts()
id_con2=title_count[title_count>1].index
data_con3=data_not11[data_not11["title"].str.contains("预售")]
id_con3=data_con3["id"].value_counts().index
print("未参与双十一当天活动的商品中,有%.2f个为暂时下架商品,有%i个为重新上架商品,有%i个为预售商品" %(len(id_con1),len(id_con2),len(id_con3)))
data_11sale=id_11
id_11sale_final=np.hstack((data_11sale,id_con3))
result2=pd.DataFrame({"id":id_11sale_final})
x1=pd.DataFrame({"id":id_11})
x1_df=pd.merge(x1,df,on="id",how="left")
brand_11sale=x1_df[["id","店名"]].groupby(by="店名").count()
x2=pd.DataFrame({"id":id_con3})
x2_df=pd.merge(x2,df,on="id",how="left")
brand_yushou=x2_df[["id","店名"]].groupby(by="店名").count()
result2_data=pd.merge(brand_yushou,brand_11sale,on="店名",how="left")
result2_data["sum"]=result2_data.iloc[:,0]+result2_data.iloc[:,1]
result2_data.sort_values(by='sum',inplace=True,ascending=False)
result2_data.rename(columns={'id_x':'presale', 'id_y':'sale_on_11', 'sum':'sum'}, inplace = True)
from bokeh.models import HoverTool
from bokeh.core.properties import value
lst_brand=result2_data.index.tolist()
lst_type=result2_data.columns.tolist()[:2]
colors=["white","black"]
result2_data.index.name="brand"
source=ColumnDataSource(result2_data)
hover=HoverTool(tooltips=[("品牌","@brand"),("双十一当天参与活动的商品数量","@sale_on_11"),("预售商品数量","@presale"),("真正参与双十一活动的商品总数","@sum")])
output_file("project88_08.pic1.html")
p=figure(x_range=lst_brand,plot_width=900,plot_height=350,title="各个品牌参与双十一活动的情况",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p.vbar_stack(lst_type,x="brand",source=source,width=0.8,color=colors,alpha=0.7,legend=[value(x) for x in lst_type],muted_color = "black",muted_alpha=0.2 )
show(p)
data2=df[["id","title","店名","date","price"]]
data2["period"]=pd.cut(data2["date"],[4,10,11,14],labels=["双十一前","双十一中","双十一后"])
price=data2[["id","price","period"]].groupby(by=["id","price"]).min()
price.reset_index(inplace=True)
id_count=price["id"].value_counts()
id_type1=id_count[id_count==1].index
id_type2=id_count[id_count!=1].index
result3_data1=data2[["id","price","period","店名"]].groupby(["id","price"]).min()
result3_data1.reset_index(inplace=True)
result3_before11=result3_data1[result3_data1["period"]=="双十一前"]
result3_at11=result3_data1[result3_data1["period"]=="双十一中"]
result3_data2=pd.merge(result3_before11,result3_at11,on="id",how="left")
result3_data2["discount"]=result3_data2["price_y"]/result3_data2["price_x"]
bokeh_data=result3_data2[["id","discount"]].dropna()
bokeh_data["discount_range"]=pd.cut(bokeh_data["discount"],bins=np.linspace(0,1,21))
bokeh_data2=bokeh_data[["discount","id","discount_range"]].groupby(by="discount_range").count().iloc[:-1]
bokeh_data2["discount_pre"]=bokeh_data2["discount"]/bokeh_data2["discount"].sum()
output_file("project8_pic2.html")
source1=ColumnDataSource(bokeh_data2)
lst_discount=bokeh_data2.index.tolist()
hover=HoverTool(tooltips=[("折扣率","@discount")])
p=figure(x_range=lst_discount,plot_height=400,plot_width=900,title="商品折扣率统计",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p.line(x="discount_range",y="discount_pre",source=source1,line_color="black",line_dash=[10,4])
p.circle(x="discount_range",y="discount_pre",source=source1,size=8,color="red",alpha=0.8)
output_file("project8_pic2.html")
bokeh_data1=result3_data2[["店名_x","discount"]].dropna()
bokeh_data3=bokeh_data1[["店名_x","discount"]].groupby(by="店名_x").mean()
source2=ColumnDataSource(bokeh_data3)
hover=HoverTool(tooltips=[("平均折扣率","@discount")])
lst_sp=bokeh_data3.index.tolist()
p2=figure(x_range=lst_sp,plot_height=350,plot_width=900,title="各店的平均折扣率",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p2.line(x="店名_x",y="discount",source=source2,line_color="black",line_dash=[10,4])
p2.circle(x="店名_x",y="discount",source=source2,size=8,color="red",alpha=0.8)
show(p2)
from bokeh.transform import jitter
brand=result3_data2["店名_x"].dropna().unique().tolist()
hover1=HoverTool(tooltips=[("平均折扣率","@discount")])
bokeh_data4=result3_data2[["id","discount","店名_x"]].dropna()
bokeh_data4=bokeh_data4[bokeh_data4["discount"]<0.96]
source3=ColumnDataSource(bokeh_data4)
output_file("project08_pic3.html")
p3=figure(y_range=brand,plot_height=900,plot_width=900,title="各店的平均折扣率",tools=[hover1,"box_select,reset,xwheel_zoom,pan,crosshair"])
p3.circle(x="discount",y=jitter("店名_x",width=0.7,range=p3.y_range),source=source3,alpha=0.3,color="red")
show(p3)
data_zk=result3_data2[result3_data2["discount"]<0.95]
result4_zkld=data_zk.groupby(by="店名_y")["discount"].mean()
n_dz=data_zk["店名_y"].value_counts()
n_total=result3_data2["店名_y"].value_counts()
result4_dzspbl=pd.DataFrame({"打折商品数":n_dz,"商品总数":n_total})
result4_dzspbl["参与打折的商品比例"]=result4_dzspbl["打折商品数"]/result4_dzspbl["商品总数"]
result4_dzspbl.dropna(inplace=True)
result_sum=result2_data.copy()
result4_data=pd.merge(pd.DataFrame(result4_zkld),result4_dzspbl,left_index=True,right_index=True,how="inner")
result4_data=pd.merge(result4_data,result_sum,left_index=True,right_index=True,how="inner")
from bokeh.models.annotations import Span,Label,BoxAnnotation
bokeh_data=result4_data[["discount","sum","参与打折的商品比例"]]
bokeh_data.columns=["discount","amount","pre"]
bokeh_data["size"]=bokeh_data["amount"]*0.03
source=ColumnDataSource(bokeh_data)
x_mean=bokeh_data["pre"].mean()
y_mean=bokeh_data["discount"].mean()
hover=HoverTool(tooltips=[("品牌","@index"),("折扣率","@discount"),("商品总数","@amount"),("参与打折商品数量","@pre")])
output_file("project08_pic4.html")
p4=figure(plot_width=900,plot_height=900,title="各个品牌打折套路解析",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p4.circle_x(x="pre",y="discount",source=source,size="size",fill_color="red",line_color="black",fill_alpha=0.6,line_dash=[8,3])
p4.ygrid.grid_line_dash=[6,4]
p4.xgrid.grid_line_dash=[6,4]
x=Span(location=x_mean,dimension="height",line_color="green",line_alpha=0.7,line_width=0.6)
y=Span(location=y_mean,dimension="width",line_color="green",line_alpha=0.7,line_width=0.6)
p4.add_layout(x)
p4.add_layout(y)
bg1=BoxAnnotation(bottom=y_mean,right=x_mean,fill_alpha=0.1,fill_color="olive")
label1=Label(x=1.1,y=0.55,text="大量大打折",text_font_size="10pt")
p4.add_layout(bg1)
p4.add_layout(label1)
bg2=BoxAnnotation(bottom=y_mean,left=x_mean,fill_alpha=0.1,fill_color="firebrick")
label2=Label(x=0.8,y=0.55,text="少量少打折",text_font_size="10pt")
p4.add_layout(bg2)
p4.add_layout(label2)
bg3=BoxAnnotation(bottom=y_mean,right=x_mean,fill_alpha=0.1,fill_color="firebrick")
label3=Label(x=1.1,y=0.8,text="大量少打折",text_font_size="10pt")
p4.add_layout(bg3)
p4.add_layout(label3)
bg4=BoxAnnotation(bottom=y_mean,left=x_mean,fill_alpha=0.1,fill_color="olive")
label4=Label(x=0.8,y=0.8,text="少量大打折",text_font_size="10pt")
p4.add_layout(bg4)
p4.add_layout(label4)
show(p4)