python--双十一淘宝商家营销策略分析

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings#忽略红色的警告
warnings.filterwarnings("ignore")

from bokeh.plotting import figure,show,output_file
from bokeh.models import ColumnDataSource

#导入数据
df=pd.read_excel("双十一淘宝美妆数据.xlsx")
print(df)
df.fillna(0,inplace=True)
df.index=df['update_time']
df['date']=df.index.day
#加载数据,提取销售日期
#双十一当天销售占比情况

data1=df[['id','title','店名','date']]
#统计销售商品第一天和最后一天的日期
d1=data1[['id','date']].groupby(by='id').agg(['min','max'])['date']
#双十一当天销售的商品,差不多有2336条数据
id_11=data1[data1["date"]==11]["id"]
#这里的ture相当于一个标签
d2=pd.DataFrame({"id":id_11,"双十一当天是否售卖":True})
#将数据合并在一起,有利于分析哪些商品在双十一没有进行销售
id_date=pd.merge(d1,d2,left_index=True,right_on='id',how= 'left')
id_date.fillna(False,inplace=True)

#占比
m=len(d1)
m_11=len(id_11)
m_per=m_11/m
print("双十一当天参与活动的商品共%i个,占比为%.2f%%"     %(m_11,m_per*100))#%.1%就是保留一位数字

#商品销售节奏分类
id_date['type']="待分类"
id_date['type'][(id_date['min']<11)&(id_date['max']>11)]='A'
id_date['type'][(id_date['min']<11)&(id_date['max']==11)]='B'
id_date['type'][(id_date['min']==11)&(id_date['max']>11)]='C'
id_date['type'][(id_date['min']==11)&(id_date['max']==11)]='D'
id_date['type'][id_date['双十一当天是否售卖']==False]='F'#先确定双十一当天都不售卖的,那么后面的日子,在进行判断,因为下面两个都是双十一都没有卖的
id_date['type'][id_date['max']<11]='E'
id_date['type'][id_date['min']>11]='G'

result1=id_date['type'].value_counts()
print()#id_data[['type','id']].groupby('type').count()相同
result1=result1.loc[['A','B','C','D','E','F','G']]#对类别进行排序

#计算不同类别的的商品数量
from bokeh.palettes import brewer
colori=brewer['YlGn'][7]#七种类别
plt.axis('equal')#显示一个空图
#startangle是调整角度为90度,
plt.pie(result1,labels=result1.index,autopct='%.2f%%',colors=colori,startangle=90,radius=1.5,counterclock=False)#autopct是加上数据的占比

#未参加双十一当天活动的商品去向如何
id_not11=id_date[id_date["双十一当天是否售卖"]==False]
#找到双十一当天未参加活动的商品数据
df_not11=id_not11[["id","type"]]
data_not11=pd.merge(df_not11,df,on='id',how="left")

#双十一之前下架
id_con1=id_date['id'][id_date["type"]=="F"].values

#双十一后重新上架的
data_con2=data_not11[["id","title","date"]].groupby(by=["id","title"]).count()
#有些id是重复的,所以需要找出来,就是说上架的次数比较多,相同的id出现的次数就是重新上架的次数
title_count=data_con2.reset_index()["id"].value_counts()
id_con2=title_count[title_count>1].index#也就是把id找出来

#找出预售的数据
data_con3=data_not11[data_not11["title"].str.contains("预售")]
id_con3=data_con3["id"].value_counts().index

print("未参与双十一当天活动的商品中,有%.2f个为暂时下架商品,有%i个为重新上架商品,有%i个为预售商品" %(len(id_con1),len(id_con2),len(id_con3)))

#找出真正在双十一进行售卖的商品,双十一当天和预售的
data_11sale=id_11
id_11sale_final=np.hstack((data_11sale,id_con3))#hstack  竖向堆叠
result2=pd.DataFrame({"id":id_11sale_final})
#并且找出对应的原数据
x1=pd.DataFrame({"id":id_11})
x1_df=pd.merge(x1,df,on="id",how="left")
#不同品牌双十一当天销售的商品的数量
brand_11sale=x1_df[["id","店名"]].groupby(by="店名").count()

x2=pd.DataFrame({"id":id_con3})
x2_df=pd.merge(x2,df,on="id",how="left")
#不同品牌双十一当天销售的商品的数量
brand_yushou=x2_df[["id","店名"]].groupby(by="店名").count()
result2_data=pd.merge(brand_yushou,brand_11sale,on="店名",how="left")
#result_data1=pd.DataFrame({"双十一当日销售":brand_11sale,"预售商品数量":brand_yushou})

#求一下总量
result2_data["sum"]=result2_data.iloc[:,0]+result2_data.iloc[:,1]
result2_data.sort_values(by='sum',inplace=True,ascending=False)
result2_data.rename(columns={'id_x':'presale', 'id_y':'sale_on_11', 'sum':'sum'}, inplace = True)

#堆叠图
from bokeh.models import HoverTool
from bokeh.core.properties import value

#基本参数
lst_brand=result2_data.index.tolist()
lst_type=result2_data.columns.tolist()[:2]#当天和预售的各品牌数量
colors=["white","black"]

result2_data.index.name="brand"

source=ColumnDataSource(result2_data)

hover=HoverTool(tooltips=[("品牌","@brand"),("双十一当天参与活动的商品数量","@sale_on_11"),("预售商品数量","@presale"),("真正参与双十一活动的商品总数","@sum")])
output_file("project88_08.pic1.html")
#这是空图
p=figure(x_range=lst_brand,plot_width=900,plot_height=350,title="各个品牌参与双十一活动的情况",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
#做一个堆叠图,这里source包含了不同品牌的之,years变量可以识别不同对叠层
p.vbar_stack(lst_type,x="brand",source=source,width=0.8,color=colors,alpha=0.7,legend=[value(x) for x in lst_type],muted_color = "black",muted_alpha=0.2 )
show(p)

#哪些商品在打折
data2=df[["id","title","店名","date","price"]]
data2["period"]=pd.cut(data2["date"],[4,10,11,14],labels=["双十一前","双十一中","双十一后"])
#筛选数据
price=data2[["id","price","period"]].groupby(by=["id","price"]).min()#因为一个id可能在多个时期有不同的价格,进行分组,找出该商品在某个时期的价格
price.reset_index(inplace=True)
#查看数据是否有波动
id_count=price["id"].value_counts()
#筛选出打折和不打折的商品
id_type1=id_count[id_count==1].index
id_type2=id_count[id_count!=1].index
#针对打针的商品,折扣率是多少
result3_data1=data2[["id","price","period","店名"]].groupby(["id","price"]).min()
result3_data1.reset_index(inplace=True)
#分析折扣率
#合并数据
result3_before11=result3_data1[result3_data1["period"]=="双十一前"]
result3_at11=result3_data1[result3_data1["period"]=="双十一中"]
result3_data2=pd.merge(result3_before11,result3_at11,on="id",how="left")
result3_data2["discount"]=result3_data2["price_y"]/result3_data2["price_x"]
#制作折线图
bokeh_data=result3_data2[["id","discount"]].dropna()
bokeh_data["discount_range"]=pd.cut(bokeh_data["discount"],bins=np.linspace(0,1,21))#np.linspace就是分段的
bokeh_data2=bokeh_data[["discount","id","discount_range"]].groupby(by="discount_range").count().iloc[:-1]#去掉最后一行
#计算折扣率区间占比
bokeh_data2["discount_pre"]=bokeh_data2["discount"]/bokeh_data2["discount"].sum()

#绘图
output_file("project8_pic2.html")
source1=ColumnDataSource(bokeh_data2)
lst_discount=bokeh_data2.index.tolist()
hover=HoverTool(tooltips=[("折扣率","@discount")])
p=figure(x_range=lst_discount,plot_height=400,plot_width=900,title="商品折扣率统计",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p.line(x="discount_range",y="discount_pre",source=source1,line_color="black",line_dash=[10,4])#line_dash虚线
p.circle(x="discount_range",y="discount_pre",source=source1,size=8,color="red",alpha=0.8)
#show(p)

#不同品牌的折扣率的图
output_file("project8_pic2.html")
bokeh_data1=result3_data2[["店名_x","discount"]].dropna()
bokeh_data3=bokeh_data1[["店名_x","discount"]].groupby(by="店名_x").mean()
source2=ColumnDataSource(bokeh_data3)
hover=HoverTool(tooltips=[("平均折扣率","@discount")])
lst_sp=bokeh_data3.index.tolist()
p2=figure(x_range=lst_sp,plot_height=350,plot_width=900,title="各店的平均折扣率",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
p2.line(x="店名_x",y="discount",source=source2,line_color="black",line_dash=[10,4])
p2.circle(x="店名_x",y="discount",source=source2,size=8,color="red",alpha=0.8)
show(p2)

from bokeh.transform import jitter
#brand=list(set(result3_data2["店名_x"].dropna().tolist()))也可以
brand=result3_data2["店名_x"].dropna().unique().tolist()
hover1=HoverTool(tooltips=[("平均折扣率","@discount")])
bokeh_data4=result3_data2[["id","discount","店名_x"]].dropna()
bokeh_data4=bokeh_data4[bokeh_data4["discount"]<0.96]
source3=ColumnDataSource(bokeh_data4)

output_file("project08_pic3.html")
p3=figure(y_range=brand,plot_height=900,plot_width=900,title="各店的平均折扣率",tools=[hover1,"box_select,reset,xwheel_zoom,pan,crosshair"])
p3.circle(x="discount",y=jitter("店名_x",width=0.7,range=p3.y_range),source=source3,alpha=0.3,color="red")
show(p3)
#分析商品的打折力度
#首先去掉一些没有打折的商品
data_zk=result3_data2[result3_data2["discount"]<0.95]
result4_zkld=data_zk.groupby(by="店名_y")["discount"].mean()

#筛选出不同品牌的折扣情况
n_dz=data_zk["店名_y"].value_counts()
n_total=result3_data2["店名_y"].value_counts()
result4_dzspbl=pd.DataFrame({"打折商品数":n_dz,"商品总数":n_total})
result4_dzspbl["参与打折的商品比例"]=result4_dzspbl["打折商品数"]/result4_dzspbl["商品总数"]
result4_dzspbl.dropna(inplace=True)
result_sum=result2_data.copy()#这个数据说明每个品牌参与双十一的总数
result4_data=pd.merge(pd.DataFrame(result4_zkld),result4_dzspbl,left_index=True,right_index=True,how="inner")#取交集
result4_data=pd.merge(result4_data,result_sum,left_index=True,right_index=True,how="inner")

#使用discount作为作为纵坐标,参与折扣比例为横坐标,参与双十一活动商品作为园的大小
from bokeh.models.annotations import Span,Label,BoxAnnotation

bokeh_data=result4_data[["discount","sum","参与打折的商品比例"]]
bokeh_data.columns=["discount","amount","pre"]
#bokeh_data.rename(columns={"discount":"discount","amount":"sum","pre":"参与打折商品比例"})
bokeh_data["size"]=bokeh_data["amount"]*0.03
source=ColumnDataSource(bokeh_data)
x_mean=bokeh_data["pre"].mean()
y_mean=bokeh_data["discount"].mean()

#绘制构图空间
hover=HoverTool(tooltips=[("品牌","@index"),("折扣率","@discount"),("商品总数","@amount"),("参与打折商品数量","@pre")])
output_file("project08_pic4.html")
p4=figure(plot_width=900,plot_height=900,title="各个品牌打折套路解析",tools=[hover,"reset,xwheel_zoom,pan,crosshair"])
#绘制散点图
p4.circle_x(x="pre",y="discount",source=source,size="size",fill_color="red",line_color="black",fill_alpha=0.6,line_dash=[8,3])
p4.ygrid.grid_line_dash=[6,4]
p4.xgrid.grid_line_dash=[6,4]
#绘制辅助线
x=Span(location=x_mean,dimension="height",line_color="green",line_alpha=0.7,line_width=0.6)
y=Span(location=y_mean,dimension="width",line_color="green",line_alpha=0.7,line_width=0.6)
p4.add_layout(x)
p4.add_layout(y)
#绘制第一象限
bg1=BoxAnnotation(bottom=y_mean,right=x_mean,fill_alpha=0.1,fill_color="olive")
label1=Label(x=1.1,y=0.55,text="大量大打折",text_font_size="10pt")#添加图例的位置
p4.add_layout(bg1)
p4.add_layout(label1)
#第二象限
bg2=BoxAnnotation(bottom=y_mean,left=x_mean,fill_alpha=0.1,fill_color="firebrick")
label2=Label(x=0.8,y=0.55,text="少量少打折",text_font_size="10pt")
p4.add_layout(bg2)
p4.add_layout(label2)
#第三象限
bg3=BoxAnnotation(bottom=y_mean,right=x_mean,fill_alpha=0.1,fill_color="firebrick")
label3=Label(x=1.1,y=0.8,text="大量少打折",text_font_size="10pt")
p4.add_layout(bg3)
p4.add_layout(label3)
#第四象限
bg4=BoxAnnotation(bottom=y_mean,left=x_mean,fill_alpha=0.1,fill_color="olive")
label4=Label(x=0.8,y=0.8,text="少量大打折",text_font_size="10pt")
p4.add_layout(bg4)
p4.add_layout(label4)

show(p4)

你可能感兴趣的:(python--双十一淘宝商家营销策略分析)