rfm用户价值模型数据处理

import numpyas np

import pandasas pd

import time

def get_rfm():

# 获取数据,数据处理

    path ='/Users/fangli/Desktop/untitled/shuju.xlsx'

    df = pd.read_excel(io=path, sheet_name='订单记录', usecols=[0, 1, 2, 3, 4, 8])

df.dropna(how='any', inplace=True)

df = df.loc[df['订单状态'] =='交易成功', :]

# r为最近一次购买距离现在天数

    r = df.groupby('买家昵称')['付款日期'].max().reset_index()

current_date = time.strftime('%y-%m-%d')

r['R'] = (pd.to_datetime(current_date) - r['付款日期']).dt.days

r = r[['买家昵称', 'R']]

# f为消费频次

    df['日期'] = df['付款日期'].astype(str).str[:10]

# 把单个用户一天内订单合并

    f = df.groupby(['买家昵称', '日期'])['付款日期'].count().reset_index()

f = f.groupby('买家昵称')['付款日期'].count().reset_index()

f.columns = ['买家昵称', 'F']

# m为平均消费金额

    sum_m = df.groupby('买家昵称')['实付金额'].sum().reset_index()

sum_m.columns = ['买家昵称', '总支付金额']

avg_m = pd.merge(sum_m, f, left_on='买家昵称', right_on='买家昵称', how='inner')

avg_m['M'] = avg_m['总支付金额'] / avg_m['F']

# 合并rfm到dataframe上

    rfm = pd.merge(r,avg_m,left_on ='买家昵称',right_on ='买家昵称',how ='inner')

rfm = rfm[['买家昵称','R','F','M']]

array_r = rfm['R'].values

array_m = rfm['M'].values

array_f = rfm['F'].values

#计算四分位数,设定权重,赋予分值

    list_r = calculate_quartile(array_r)

list_m = calculate_quartile(array_m)

list_f = [0, 1.1, 2.1, 3.1, 7.1]

scores_r = [4, 3, 2, 1]

scores_m = [2, 4, 6, 8]

scores_f = [0.1, 0.25, 0.5, 1]

rfm['r-scores'] = pd.cut(rfm['R'], bins=list_r, labels=scores_r).astype(float)

rfm['f-scores'] = pd.cut(rfm['F'], bins=list_f, labels=scores_f).astype(float)

rfm['m-scores'] = pd.cut(rfm['M'], bins=list_m, labels=scores_m).astype(float)

# 将true和false值转化为0,1判断

    rfm['R是否大于均值'] = (rfm['r-scores'] >= rfm['r-scores'].mean())*1

    rfm['F是否大于均值'] = (rfm['f-scores'] >= rfm['f-scores'].mean())*1

    rfm['M是否大于均值'] = (rfm['m-scores'] >= rfm['m-scores'].mean())*1

    rfm['判断代号'] = (rfm['R是否大于均值'] *100) + (rfm['F是否大于均值'] *10) + (rfm['M是否大于均值'] *1)

# 客户分层

    rfm['客户分层'] = rfm['判断代号'].apply(classify)

return rfm

# 计算四分位数

def calculate_quartile(array_data):

min_quartile =min(array_data)

first_quartile = np.percentile(array_data, 25).astype(float)

second_quartile = np.median(array_data).astype(float)

third_quartile = np.percentile(array_data, 75).astype(float)

max_quartile =max(array_data)

list = [min_quartile,first_quartile,second_quartile,third_quartile,max_quartile]

return list

# 定义客户类别,m为1为重要客户,m为0为一般客户。

def classify(code):

if code ==111:

label ='重要价值客户'

    elif code ==110:

label ='一般价值客户'

    elif code ==101:

label ='重要发展客户'

    elif code ==100:

label ='一般发展客户'

    elif code ==11:

label ='重要保持客户'

    elif code ==10:

label ='一般保持客户'

    elif code ==1:

label ='重要挽留客户'

    elif code ==0:

label ='一般挽留客户'

    return label

data = get_rfm()

# 数据统计:每个层次的人数及其百分比

count = data['客户分层'].value_counts().reset_index()

count.columns = ['客户类型','人数']

count['人数占比'] = count['人数'] / count['人数'].sum()

# 数据统计:每个层次的消费金额及其百分比

data['购买总金额'] = data['F'] * data['M']

mon = data.groupby('客户分层')['购买总金额'].sum().reset_index()

mon.columns = ['客户类型','消费金额']

mon['金额占比'] = mon['消费金额'] / mon['消费金额'].sum()

# 合并两个dataframe

result = pd.merge(count,mon,left_on ='客户类型',right_on ='客户类型')

print(result)

你可能感兴趣的:(rfm用户价值模型数据处理)