使用Python聚类算法,实现RFM客户分群,进行精准营销!

import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

df = pd.read_csv(r'D:\project\RFM\rfm\kate_rfm.csv', encoding='gbk')
column_list = list(df.columns)
column_list.remove('buyer_nick')
# 归一化
def standardization(x,Max,Min):
    x = (x-Min)/(Max-Min)
    return x
for column in column_list:
    column_x = column + '_x'
    max_x = np.max(df[column])
    min_x = np.min(df[column])
    df[column_x] = df[column].apply(standardization,Max=max_x, Min=min_x)
    print(column + ' is ok ')


SSE = []
for i in range(1,10):
    estimator = KMeans(n_clusters=i)
    estimator.fit(df[['recency_x', 'frequency_x', 'monetary_x']])
    SSE.append(estimator.inertia_)
X = range(1,10)
plt.xlabel('k')
plt.ylabel('SSE')
plt.plot(X, SSE, 'o-')
plt.show()

model = KMeans(n_clusters=4, max_iter=500)
model.fit(df[['recency_x', 'frequency_x', 'monetary_x']])
df['label'] = model.labels_
label_list = df['label'].tolist()
label_list = list(set(label_list))
df_mean = pd.DataFrame(columns = ['recency', 'frequency', 'monetary', 'label'])
for label in label_list:
    df_label = df[df['label'] == label]
    df_label = df_label[['recency', 'frequency', 'monetary', 'label']]
    describe = df_label.describe()
    df_mean.loc[label] = describe.loc['mean'].tolist()
df_mean

你可能感兴趣的:(机器学习,python练习)