import numpyas np
import pandasas pd
import time
def get_rfm():
# 获取数据,数据处理
path ='/Users/fangli/Desktop/untitled/shuju.xlsx'
df = pd.read_excel(io=path, sheet_name='订单记录', usecols=[0, 1, 2, 3, 4, 8])
df.dropna(how='any', inplace=True)
df = df.loc[df['订单状态'] =='交易成功', :]
# r为最近一次购买距离现在天数
r = df.groupby('买家昵称')['付款日期'].max().reset_index()
current_date = time.strftime('%y-%m-%d')
r['R'] = (pd.to_datetime(current_date) - r['付款日期']).dt.days
r = r[['买家昵称', 'R']]
# f为消费频次
df['日期'] = df['付款日期'].astype(str).str[:10]
# 把单个用户一天内订单合并
f = df.groupby(['买家昵称', '日期'])['付款日期'].count().reset_index()
f = f.groupby('买家昵称')['付款日期'].count().reset_index()
f.columns = ['买家昵称', 'F']
# m为平均消费金额
sum_m = df.groupby('买家昵称')['实付金额'].sum().reset_index()
sum_m.columns = ['买家昵称', '总支付金额']
avg_m = pd.merge(sum_m, f, left_on='买家昵称', right_on='买家昵称', how='inner')
avg_m['M'] = avg_m['总支付金额'] / avg_m['F']
# 合并rfm到dataframe上
rfm = pd.merge(r,avg_m,left_on ='买家昵称',right_on ='买家昵称',how ='inner')
rfm = rfm[['买家昵称','R','F','M']]
array_r = rfm['R'].values
array_m = rfm['M'].values
array_f = rfm['F'].values
#计算四分位数,设定权重,赋予分值
list_r = calculate_quartile(array_r)
list_m = calculate_quartile(array_m)
list_f = [0, 1.1, 2.1, 3.1, 7.1]
scores_r = [4, 3, 2, 1]
scores_m = [2, 4, 6, 8]
scores_f = [0.1, 0.25, 0.5, 1]
rfm['r-scores'] = pd.cut(rfm['R'], bins=list_r, labels=scores_r).astype(float)
rfm['f-scores'] = pd.cut(rfm['F'], bins=list_f, labels=scores_f).astype(float)
rfm['m-scores'] = pd.cut(rfm['M'], bins=list_m, labels=scores_m).astype(float)
# 将true和false值转化为0,1判断
rfm['R是否大于均值'] = (rfm['r-scores'] >= rfm['r-scores'].mean())*1
rfm['F是否大于均值'] = (rfm['f-scores'] >= rfm['f-scores'].mean())*1
rfm['M是否大于均值'] = (rfm['m-scores'] >= rfm['m-scores'].mean())*1
rfm['判断代号'] = (rfm['R是否大于均值'] *100) + (rfm['F是否大于均值'] *10) + (rfm['M是否大于均值'] *1)
# 客户分层
rfm['客户分层'] = rfm['判断代号'].apply(classify)
return rfm
# 计算四分位数
def calculate_quartile(array_data):
min_quartile =min(array_data)
first_quartile = np.percentile(array_data, 25).astype(float)
second_quartile = np.median(array_data).astype(float)
third_quartile = np.percentile(array_data, 75).astype(float)
max_quartile =max(array_data)
list = [min_quartile,first_quartile,second_quartile,third_quartile,max_quartile]
return list
# 定义客户类别,m为1为重要客户,m为0为一般客户。
def classify(code):
if code ==111:
label ='重要价值客户'
elif code ==110:
label ='一般价值客户'
elif code ==101:
label ='重要发展客户'
elif code ==100:
label ='一般发展客户'
elif code ==11:
label ='重要保持客户'
elif code ==10:
label ='一般保持客户'
elif code ==1:
label ='重要挽留客户'
elif code ==0:
label ='一般挽留客户'
return label
data = get_rfm()
# 数据统计:每个层次的人数及其百分比
count = data['客户分层'].value_counts().reset_index()
count.columns = ['客户类型','人数']
count['人数占比'] = count['人数'] / count['人数'].sum()
# 数据统计:每个层次的消费金额及其百分比
data['购买总金额'] = data['F'] * data['M']
mon = data.groupby('客户分层')['购买总金额'].sum().reset_index()
mon.columns = ['客户类型','消费金额']
mon['金额占比'] = mon['消费金额'] / mon['消费金额'].sum()
# 合并两个dataframe
result = pd.merge(count,mon,left_on ='客户类型',right_on ='客户类型')
print(result)