2019-04-10 某CD网站用户消费行为分析常出错的语句总结

案例及代码来源:https://www.jianshu.com/p/4778bffbfcc6

from datetime import datetime

%matplotlib inline

plt.style.use('ggplot')

df['order_date']=pd.to_datetime(df.order_dt,format='%Y%m%d')

df['month']=df.order_date.values.astype('datetime64[M]')

plt.figure(figsize=(12,4))

plt.subplot(121)

df.order_amount.hist(bins=30)

columns_month=df.month.sort_values().astype('str').unique()

pivoted_counts.columns=columns_month

pivoted_amount=df.pivot_table(index='user_id',columns='month',values='order_amount',aggfunc='mean').fillna(0)

columns_month=df.month.sort_values().astype('str').unique()

pivoted_amount.columns=columns_month

return pd.Series(status,index=columns_month)

pivoted_status_counts=pivoted_purchase_status.replace('unreg',np.NaN).apply(lambda x: pd.value_counts(x)

pivoted_status_counts.head()

purchase_status_counts.fillna(0).T.plot.area(figsize=(12,4))

return_rate=purchase_status_counts.apply(lambda x: x/x.sum(),axis=1)

return_rate.loc['return'].plot(figsize=(12,6))

user_purchase=df[['user_id','order_products','order_amount','

((order_date_max-order_date_min)/np.timedelta64(1,'D')).hist(bins=15)

user_purchase_retention=pd.merge(left=user_purchase,right=order_date_min.reset_index(),how='inner',on='user_id',suffixes=('','_min'))

user_purchase_retention['date_diff_bin']=pd.cut(user_purchase_retention.date_diff,bins=bin)

pivoted_retention=user_purchase_retention.pivot_table(index='user_id',columns='date_diff_bin',values='order_amount',aggfunc=sum)

pivoted_retention.mean()

pivoted_retention_trans=pivoted_retention.fillna.applymap(lambda x: 1 if x>0 else 0)

(pivoted_retention_trans.sum()/pivoted_retention_trans.count()).plot.bar()

def diff(group):

d=group.date_diff-group.date_diff.shift(-1)

return date_diff

last_diff=user_purchase_retention.groupby('user_id').apply(diff)

last_diff.mean()

last_diff.hist(bins=20)

你可能感兴趣的:(2019-04-10 某CD网站用户消费行为分析常出错的语句总结)