python RFM分析

python RFM分析_第1张图片


python RFM分析_第2张图片


python RFM分析_第3张图片

import numpy

import pandas

data = pandas.read_csv(

'D:\\PDA\\5.7\\data.csv'

)

python RFM分析_第4张图片

data['DealDateTime'] = pandas.to_datetime(

data.DealDateTime,

format='%Y/%m/%d'

)

python RFM分析_第5张图片

data['DateDiff'] = pandas.to_datetime(

'today'

) - data['DealDateTime']

python RFM分析_第6张图片

data['DateDiff'] = data['DateDiff'].dt.days

python RFM分析_第7张图片

R_Agg = data.groupby(

by=['CustomerID']

)['DateDiff'].agg({

'RecencyAgg': numpy.min

})

python RFM分析_第8张图片

F_Agg = data.groupby(

by=['CustomerID']

)['OrderID'].agg({

'FrequencyAgg': numpy.size

})

python RFM分析_第9张图片

M_Agg = data.groupby(

by=['CustomerID']

)['Sales'].agg({

'MonetaryAgg': numpy.sum

})

python RFM分析_第10张图片

aggData = R_Agg.join(F_Agg).join(M_Agg)

python RFM分析_第11张图片

bins = aggData.RecencyAgg.quantile(

q=[0, 0.2, 0.4, 0.6, 0.8, 1],

interpolation='nearest'

)

bins[0] = 0

labels = [5, 4, 3, 2, 1]

R_S = pandas.cut(

aggData.RecencyAgg,

bins, labels=labels

)

bins = aggData.FrequencyAgg.quantile(

q=[0, 0.2, 0.4, 0.6, 0.8, 1],

interpolation='nearest'

)

bins[0] = 0;

labels = [1, 2, 3, 4, 5];

F_S = pandas.cut(

aggData.FrequencyAgg,

bins, labels=labels

)

bins = aggData.MonetaryAgg.quantile(

q=[0, 0.2, 0.4, 0.6, 0.8, 1],

interpolation='nearest'

)

bins[0] = 0

labels = [1, 2, 3, 4, 5]

M_S = pandas.cut(

aggData.MonetaryAgg,

bins, labels=labels

)

aggData['R_S']=R_S

aggData['F_S']=F_S

aggData['M_S']=M_S

python RFM分析_第12张图片

aggData['RFM'] = 100*R_S.astype(int) + 10*F_S.astype(int) + 1*M_S.astype(int)

python RFM分析_第13张图片

bins = aggData.RFM.quantile(

q=[

0, 0.125, 0.25, 0.375, 0.5,

0.625, 0.75, 0.875, 1

],

interpolation='nearest'

)

bins[0] = 0

labels = [1, 2, 3, 4, 5, 6, 7, 8]

aggData['level'] = pandas.cut(

aggData.RFM,

bins, labels=labels

)

python RFM分析_第14张图片

aggData = aggData.reset_index()

python RFM分析_第15张图片

fe=aggData.sort_values(

['level', 'RFM'],

ascending=[1, 1]

)

python RFM分析_第16张图片

dd=aggData.groupby(

by=['level']

)['CustomerID'].agg({

'size':numpy.size

})

python RFM分析_第17张图片

你可能感兴趣的:(python RFM分析)