银行信用卡客户价值分析(Python数据分析)

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
data = pd.read_csv(r'credit_card.csv',sep=',',encoding='gbk')
print(data.columns)
#1.处理信用卡异常值
data1 = data[data['逾期'] != 1]
data2 = data[data['呆账'] != 1]
data3 = data[data['强制停卡记录'] != 1]
data4 = data[data['退票'] != 1]
data5 = data[data['拒往记录'] != 1]
data6 = data[data['瑕疵户'] != 1]
print(data6)
#2.构造信用卡客户风险关键特征
#(1)行为特征;(2)经济风险特征#;(3)收入风险特征;(4)标准化数据
data_action = data[['瑕疵户','逾期','呆账','强制停卡记录','退票','拒往记录']]
data_jingji = data[['借款余额', '个人月收入','个人月开销','家庭月收入','月刷卡额']]
data_income = data[['职业','年龄','住家']]
data_action_std = StandardScaler().fit_transform(data_action)
data_jingji_std = StandardScaler().fit_transform(data_jingji)
data_income_std = StandardScaler().fit_transform(data_income)
#3.构建Kmeans聚类模型(客户分群)
data_kmeans1 = KMeans(n_clusters=5,random_state=123).fit(data_action_std)
#print('聚类中心:',data_kmeans1.cluster_centers_)
r1 = pd.Series(data_kmeans1.labels_).value_counts()
print('行为特征每类数目:',r1)
data_kmeans2 = KMeans(n_clusters=5,random_state=123).fit(data_jingji_std)
#print('聚类中心:',data_kmeans2.cluster_centers_)
r2 = pd.Series(data_kmeans2.labels_).value_counts()
print('经济风险特征每类数目:',r2)
data_kmeans3 = KMeans(n_clusters=5,random_state=123).fit(data_income_std)
#print('聚类中心:',data_kmeans3.cluster_centers_)
r3 = pd.Series(data_kmeans3.labels_).value_counts()
print('收入风险特征每类数目:',r3)

你可能感兴趣的:(数据分析,sklearn,Python数据分析,银行信用卡客户价值分析)