%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy import stats
warnings.filterwarnings('ignore')
plt.style.use("seaborn")
plt.rc('font', family='SimHei', size=13) # 显示中文
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负
# 载入数据
data = pd.read_csv(r"./cs_training.csv",encoding='gbk')
# 查看数据集
# data.head(10)
# 将特征名字改为中文
column={'SeriousDlqin2yrs':'好坏客户',
'RevolvingUtilizationOfUnsecuredLines':'可用额度比值',
'age':'年龄',
'NumberOfTime30-59DaysPastDueNotWorse':'逾期30-59天笔数',
'DebtRatio':'负债率',
'MonthlyIncome':'月收入',
'NumberOfOpenCreditLinesAndLoans':'信贷数量',
'NumberOfTimes90DaysLate':'逾期90天笔数',
'NumberRealEstateLoansOrLines':'固定资产贷款量',
'NumberOfTime60-89DaysPastDueNotWorse':'逾期60-89天笔数',
'NumberOfDependents':'家属数量'}
data.rename(columns=column,inplace=True)
data.head()
好坏客户 | 可用额度比值 | 年龄 | 逾期30-59天笔数 | 负债率 | 月收入 | 信贷数量 | 逾期90天笔数 | 固定资产贷款量 | 逾期60-89天笔数 | 家属数量 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.766127 | 45 | 2 | 0.802982 | 9120.0 | 13 | 0 | 6 | 0 | 2.0 |
1 | 0 | 0.957151 | 40 | 0 | 0.121876 | 2600.0 | 4 | 0 | 0 | 0 | 1.0 |
2 | 0 | 0.658180 | 38 | 1 | 0.085113 | 3042.0 | 2 | 1 | 0 | 0 | 0.0 |
3 | 0 | 0.233810 | 30 | 0 | 0.036050 | 3300.0 | 5 | 0 | 0 | 0 | 0.0 |
4 | 0 | 0.907239 | 49 | 1 | 0.024926 | 63588.0 | 7 | 0 | 1 | 0 | 0.0 |
from sklearn.ensemble import RandomForestRegressor
# 用随机森林对缺失值预测填充函数
def set_missing(df):
# 把已有的数值型特征取出来
process_df = df.iloc[:,[5,0,1,2,3,4,6,7,8,9]]
# 分成已知该特征和未知该特征两部分
# dataframe.values获取的是dataframe中的数据为数组array
known = process_df[process_df['月收入'].notnull()].values
unknown = process_df[process_df['月收入'].isnull()].values
# X为已知月收入的特征属性值
X = known[:, 1:]
# y为结果标签值月收入
y = known[:, 0]
# X与y用于训练随机森林模型,fit到RandomForestRegressor之中
rfr = RandomForestRegressor(random_state=0, n_estimators=200,max_depth=3,n_jobs=-1)
rfr.fit(X,y)
# 用得到的模型进行未知特征值预测
predicted = rfr.predict(unknown[:, 1:]).round(0)
# 用得到的预测结果填补原缺失数据
df.loc[df['月收入'].isnull(), '月收入'] = predicted
return df
# 用随机森林填补比较多的缺失值
data=set_missing(data)
# 删除比较少的缺失值
data=data.dropna()
# 删除重复项
data = data.drop_duplicates()
data.info()
Int64Index: 145563 entries, 0 to 149999
Data columns (total 11 columns):
好坏客户 145563 non-null int64
可用额度比值 145563 non-null float64
年龄 145563 non-null int64
逾期30-59天笔数 145563 non-null int64
负债率 145563 non-null float64
月收入 145563 non-null float64
信贷数量 145563 non-null int64
逾期90天笔数 145563 non-null int64
固定资产贷款量 145563 non-null int64
逾期60-89天笔数 145563 non-null int64
家属数量 145563 non-null float64
dtypes: float64(4), int64(7)
memory usage: 13.3 MB
# 删除逾期30-59天笔数、逾期90天笔数、逾期60-89天笔数大于80的数据
data = data[data['逾期30-59天笔数'] < 80]
data = data[data['逾期90天笔数'] < 80]
data = data[data['逾期60-89天笔数'] < 80]
data = data[data['年龄'] > 0]
col_list = data.columns.values
col_list
array(['好坏客户', '可用额度比值', '年龄', '逾期30-59天笔数', '负债率', '月收入', '信贷数量',
'逾期90天笔数', '固定资产贷款量', '逾期60-89天笔数', '家属数量'], dtype=object)
new_col_list = []
for i in range(len(col_list)):
if i != 0 and i != 3 and i != 7 and i != 9:
new_col_list.append(col_list[i])
# 去除单侧99%上部分异常值
for item in new_col_list:
data = data[data[item] < data[item].quantile(0.99)]
import woe.feature_process as fp
import woe.eval as eval
data.columns
Index(['好坏客户', '可用额度比值', '年龄', '逾期30-59天笔数', '负债率', '月收入', '信贷数量', '逾期90天笔数',
'固定资产贷款量', '逾期60-89天笔数', '家属数量'],
dtype='object')
data.rename(columns={'好坏客户': 'target'}, inplace=True)
# woe分箱, iv and transform
data_woe = data # 用于存储所有数据的woe值
civ_list = []
n_positive = sum(data['target'])
n_negtive = len(data) - n_positive
for column in list(data.columns[1:]):
if data[column].dtypes == 'object':
civ = fp.proc_woe_discrete(data, column, n_positive, n_negtive, 0.05 * len(data), alpha=0.05)
else:
civ = fp.proc_woe_continuous(data, column, n_positive, n_negtive, 0.05 * len(data), alpha=0.05)
civ_list.append(civ)
data_woe[column] = fp.woe_trans(data[column], civ)
civ_df = eval.eval_feature_detail(civ_list,'output_feature_detail_0315.csv')
# 删除iv值过小的变量
iv_thre = 0.001
iv = civ_df[['var_name','iv']].drop_duplicates()
x_columns = iv.var_name[iv.iv > iv_thre]
-------------process continuous variable:可用额度比值-------------
---------------process continuous variable:年龄---------------
-----------process continuous variable:逾期30-59天笔数-----------
--------------process continuous variable:负债率---------------
--------------process continuous variable:月收入---------------
--------------process continuous variable:信贷数量--------------
------------process continuous variable:逾期90天笔数-------------
------------process continuous variable:固定资产贷款量-------------
-----------process continuous variable:逾期60-89天笔数-----------
--------------process continuous variable:家属数量--------------
可用额度比值
年龄
逾期30-59天笔数
负债率
月收入
信贷数量
逾期90天笔数
固定资产贷款量
逾期60-89天笔数
家属数量
civ_df
var_name | split_list | sub_total_sample_num | positive_sample_num | negative_sample_num | sub_total_num_percentage | positive_rate_in_sub_total | woe_list | iv_list | iv | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 可用额度比值 | (-INF,0.0] | 9352 | 281 | 9071 | 0.071213 | 0.030047 | -0.757463 | 0.029626 | 1.097527 |
1 | 可用额度比值 | (0.0,0.04215617400000002] | 29188 | 365 | 28823 | 0.222259 | 0.012505 | -1.652011 | 0.312454 | 1.097527 |
2 | 可用额度比值 | (0.04215617400000002,0.0596119858] | 6962 | 111 | 6851 | 0.053014 | 0.015944 | -1.405599 | 0.059004 | 1.097527 |
3 | 可用额度比值 | (0.0596119858,0.13857709429999995] | 17901 | 375 | 17526 | 0.136312 | 0.020949 | -1.127495 | 0.108464 | 1.097527 |
4 | 可用额度比值 | (0.13857709429999995,0.21535932080000003] | 10113 | 295 | 9818 | 0.077008 | 0.029170 | -0.787977 | 0.034242 | 1.097527 |
5 | 可用额度比值 | (0.21535932080000003,0.30067412204] | 8274 | 296 | 7978 | 0.063004 | 0.035775 | -0.577063 | 0.016386 | 1.097527 |
6 | 可用额度比值 | (0.30067412204,0.3974544458] | 7510 | 371 | 7139 | 0.057187 | 0.049401 | -0.240106 | 0.002970 | 1.097527 |
7 | 可用额度比值 | (0.3974544458,0.5331554074] | 8506 | 586 | 7920 | 0.064771 | 0.068893 | 0.113193 | 0.000872 | 1.097527 |
8 | 可用额度比值 | (0.5331554074,0.74050784496] | 9985 | 996 | 8989 | 0.076033 | 0.099750 | 0.517010 | 0.025541 | 1.097527 |
9 | 可用额度比值 | (0.74050784496,0.90349439404] | 7295 | 1103 | 6192 | 0.055550 | 0.151199 | 0.991796 | 0.084555 | 1.097527 |
10 | 可用额度比值 | (0.90349439404,+INF) | 16238 | 3360 | 12878 | 0.123648 | 0.206922 | 1.373441 | 0.423411 | 1.097527 |
0 | 年龄 | (-INF,32.0] | 13531 | 1392 | 12139 | 0.103035 | 0.102875 | 0.551338 | 0.039964 | 0.046040 |
1 | 年龄 | (32.0,+INF) | 117793 | 6747 | 111046 | 0.896965 | 0.057278 | -0.083827 | 0.006076 | 0.046040 |
0 | 逾期30-59天笔数 | (-INF,0.0] | 111119 | 4279 | 106840 | 0.846144 | 0.038508 | -0.500593 | 0.170989 | 0.606073 |
1 | 逾期30-59天笔数 | (0.0,+INF) | 20205 | 3860 | 16345 | 0.153856 | 0.191042 | 1.273765 | 0.435084 | 0.606073 |
0 | 负债率 | (-INF,0.018495376] | 10665 | 466 | 10199 | 0.081211 | 0.043694 | -0.368839 | 0.009420 | 0.088009 |
1 | 负债率 | (0.018495376,0.087064379] | 8843 | 577 | 8266 | 0.067337 | 0.065249 | 0.054956 | 0.000208 | 0.088009 |
2 | 负债率 | (0.087064379,0.138218834] | 7533 | 449 | 7084 | 0.057362 | 0.059604 | -0.041551 | 0.000097 | 0.088009 |
3 | 负债率 | (0.138218834,0.191269577] | 9148 | 493 | 8655 | 0.069660 | 0.053892 | -0.148363 | 0.001437 | 0.088009 |
4 | 负债率 | (0.191269577,0.229044637] | 6898 | 383 | 6515 | 0.052527 | 0.055523 | -0.116807 | 0.000681 | 0.088009 |
5 | 负债率 | (0.229044637,0.26480176767999997] | 6780 | 313 | 6467 | 0.051628 | 0.046165 | -0.311244 | 0.004370 | 0.088009 |
6 | 负债率 | (0.26480176767999997,0.33095571454] | 12054 | 618 | 11436 | 0.091788 | 0.051269 | -0.201013 | 0.003398 | 0.088009 |
7 | 负债率 | (0.33095571454,0.37664756308] | 7551 | 440 | 7111 | 0.057499 | 0.058270 | -0.065603 | 0.000240 | 0.088009 |
8 | 负债率 | (0.37664756308,0.4237495164599999] | 6696 | 406 | 6290 | 0.050988 | 0.060633 | -0.023343 | 0.000028 | 0.088009 |
9 | 负债率 | (0.4237495164599999,0.54743575044] | 12664 | 918 | 11746 | 0.096433 | 0.072489 | 0.167949 | 0.002929 | 0.088009 |
10 | 负债率 | (0.54743575044,0.7263413320000001] | 9112 | 825 | 8287 | 0.069386 | 0.090540 | 0.409960 | 0.013976 | 0.088009 |
11 | 负债率 | (0.7263413320000001,2.6823588614000204] | 9111 | 1026 | 8085 | 0.069378 | 0.112611 | 0.652677 | 0.039439 | 0.088009 |
12 | 负债率 | (2.6823588614000204,1009.0] | 10925 | 639 | 10286 | 0.083191 | 0.058490 | -0.061614 | 0.000307 | 0.088009 |
13 | 负债率 | (1009.0,+INF) | 13344 | 586 | 12758 | 0.101611 | 0.043915 | -0.363574 | 0.011478 | 0.088009 |
0 | 月收入 | (-INF,1159.0] | 13281 | 877 | 12404 | 0.101132 | 0.066034 | 0.067753 | 0.000478 | 0.114078 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
7 | 月收入 | (4831.0,5332.68] | 7201 | 470 | 6731 | 0.054834 | 0.065269 | 0.055274 | 0.000172 | 0.114078 |
8 | 月收入 | (5332.68,5917.0] | 7381 | 432 | 6949 | 0.056205 | 0.058529 | -0.060907 | 0.000203 | 0.114078 |
9 | 月收入 | (5917.0,6667.0] | 8647 | 533 | 8114 | 0.065845 | 0.061640 | -0.005805 | 0.000002 | 0.114078 |
10 | 月收入 | (6667.0,7916.0] | 10306 | 519 | 9787 | 0.078478 | 0.050359 | -0.219886 | 0.003448 | 0.114078 |
11 | 月收入 | (7916.0,8333.0] | 7967 | 327 | 7640 | 0.060667 | 0.041044 | -0.434172 | 0.009484 | 0.114078 |
12 | 月收入 | (8333.0,10300.0] | 10340 | 489 | 9851 | 0.078737 | 0.047292 | -0.285946 | 0.005687 | 0.114078 |
13 | 月收入 | (10300.0,+INF) | 13361 | 474 | 12887 | 0.101741 | 0.035476 | -0.585747 | 0.027165 | 0.114078 |
0 | 信贷数量 | (-INF,3.0] | 18632 | 1864 | 16768 | 0.141878 | 0.100043 | 0.520272 | 0.048333 | 0.067247 |
1 | 信贷数量 | (3.0,4.0] | 10396 | 620 | 9776 | 0.079163 | 0.059638 | -0.040946 | 0.000130 | 0.067247 |
2 | 信贷数量 | (4.0,5.0] | 11689 | 671 | 11018 | 0.089009 | 0.057404 | -0.081496 | 0.000570 | 0.067247 |
3 | 信贷数量 | (5.0,6.0] | 12373 | 651 | 11722 | 0.094217 | 0.052615 | -0.173693 | 0.002635 | 0.067247 |
4 | 信贷数量 | (6.0,7.0] | 12102 | 629 | 11473 | 0.092154 | 0.051975 | -0.186600 | 0.002958 | 0.067247 |
5 | 信贷数量 | (7.0,8.0] | 11422 | 518 | 10904 | 0.086976 | 0.045351 | -0.329890 | 0.008205 | 0.067247 |
6 | 信贷数量 | (8.0,9.0] | 10219 | 568 | 9651 | 0.077815 | 0.055583 | -0.115675 | 0.000990 | 0.067247 |
7 | 信贷数量 | (9.0,10.0] | 8745 | 488 | 8257 | 0.066591 | 0.055803 | -0.111481 | 0.000788 | 0.067247 |
8 | 信贷数量 | (10.0,11.0] | 7431 | 405 | 7026 | 0.056585 | 0.054501 | -0.136466 | 0.000993 | 0.067247 |
9 | 信贷数量 | (11.0,13.0] | 11199 | 615 | 10584 | 0.085278 | 0.054916 | -0.128456 | 0.001330 | 0.067247 |
10 | 信贷数量 | (13.0,+INF) | 17116 | 1110 | 16006 | 0.130334 | 0.064852 | 0.048416 | 0.000312 | 0.067247 |
0 | 逾期90天笔数 | (-INF,0.0] | 124488 | 5426 | 119062 | 0.947946 | 0.043587 | -0.371422 | 0.111376 | 0.800610 |
1 | 逾期90天笔数 | (0.0,+INF) | 6836 | 2713 | 4123 | 0.052054 | 0.396870 | 2.298494 | 0.689234 | 0.800610 |
0 | 固定资产贷款量 | (-INF,0.0] | 49471 | 3805 | 45666 | 0.376710 | 0.076914 | 0.231982 | 0.022454 | 0.043142 |
1 | 固定资产贷款量 | (0.0,1.0] | 48153 | 2429 | 45724 | 0.366673 | 0.050443 | -0.218124 | 0.015867 | 0.043142 |
2 | 固定资产贷款量 | (1.0,2.0] | 28413 | 1538 | 26875 | 0.216358 | 0.054130 | -0.143694 | 0.004196 | 0.043142 |
3 | 固定资产贷款量 | (2.0,+INF) | 5287 | 367 | 4920 | 0.040259 | 0.069416 | 0.121318 | 0.000625 | 0.043142 |
0 | 逾期60-89天笔数 | (-INF,0.0] | 125162 | 6053 | 119109 | 0.953078 | 0.048361 | -0.262465 | 0.058584 | 0.515526 |
1 | 逾期60-89天笔数 | (0.0,+INF) | 6162 | 2086 | 4076 | 0.046922 | 0.338526 | 2.047152 | 0.456942 | 0.515526 |
0 | 家属数量 | (-INF,0.0] | 79954 | 4351 | 75603 | 0.608830 | 0.054419 | -0.138070 | 0.010928 | 0.028199 |
1 | 家属数量 | (0.0,1.0] | 24473 | 1683 | 22790 | 0.186356 | 0.068770 | 0.111276 | 0.002423 | 0.028199 |
2 | 家属数量 | (1.0,2.0] | 18117 | 1377 | 16740 | 0.137957 | 0.076006 | 0.219126 | 0.007295 | 0.028199 |
3 | 家属数量 | (2.0,3.0] | 8780 | 728 | 8052 | 0.066858 | 0.082916 | 0.313645 | 0.007553 | 0.028199 |
66 rows × 10 columns
x_columns
0 可用额度比值
0 年龄
0 逾期30-59天笔数
0 负债率
0 月收入
0 信贷数量
0 逾期90天笔数
0 固定资产贷款量
0 逾期60-89天笔数
0 家属数量
Name: var_name, dtype: object
iv
var_name | iv | |
---|---|---|
0 | 可用额度比值 | 1.097527 |
0 | 年龄 | 0.046040 |
0 | 逾期30-59天笔数 | 0.606073 |
0 | 负债率 | 0.088009 |
0 | 月收入 | 0.114078 |
0 | 信贷数量 | 0.067247 |
0 | 逾期90天笔数 | 0.800610 |
0 | 固定资产贷款量 | 0.043142 |
0 | 逾期60-89天笔数 | 0.515526 |
0 | 家属数量 | 0.028199 |
data_woe.head()
target | 可用额度比值 | 年龄 | 逾期30-59天笔数 | 负债率 | 月收入 | 信贷数量 | 逾期90天笔数 | 固定资产贷款量 | 逾期60-89天笔数 | 家属数量 | |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 1.373441 | -0.083827 | -0.500593 | -0.041551 | 0.461028 | -0.040946 | -0.371422 | 0.231982 | -0.262465 | 0.111276 |
2 | 0 | 0.517010 | -0.083827 | 1.273765 | 0.054956 | 0.461028 | 0.520272 | 2.298494 | 0.231982 | -0.262465 | -0.138070 |
3 | 0 | -0.577063 | 0.551338 | -0.500593 | 0.054956 | 0.461028 | -0.081496 | -0.371422 | 0.231982 | -0.262465 | -0.138070 |
5 | 0 | -0.787977 | -0.083827 | -0.500593 | -0.065603 | 0.243014 | 0.520272 | -0.371422 | -0.218124 | -0.262465 | 0.111276 |
7 | 0 | 0.991796 | -0.083827 | -0.500593 | -0.116807 | 0.243014 | -0.329890 | -0.371422 | 0.231982 | -0.262465 | -0.138070 |
信用评分卡模型在国外是一种成熟的预测方法,尤其在信用风险评估以及金融风险控制领域更是得到了比较广泛的使用,其原理是将模型变量WOE编码方式离散化之后运用logistic回归模型进行的一种二分类变量的广义线性模型,下面将模型目标标量为1记为违约用户,对于目标变量为0记为正常用户,采用sklearn中LogisticRegression进行建模
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# 模型评估
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.metrics import auc
# 数据提取与数据分割
col_names = data_woe.columns.values
X = data_woe[col_names[1:]] # 特征列
y = data_woe[col_names[0]] # 标签列
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=0)
lr = LogisticRegression(C=1000.0, random_state=0)
result = lr.fit(X_train, y_train)
result
LogisticRegression(C=1000.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, l1_ratio=None, max_iter=100,
multi_class='warn', n_jobs=None, penalty='l2',
random_state=0, solver='warn', tol=0.0001, verbose=0,
warm_start=False)
# 模型预测
y_pred = lr.predict(X_test)
y_pred
array([0, 0, 0, ..., 0, 0, 0], dtype=int64)
# 预测为坏的客户的概率
prob_pred = [round(u[1], 5) for u in lr.predict_proba(X_test)]
# 预测的准确率
accuracy_score(y_test, y_pred)
0.9387532362048835
# 样本类别不平衡,用PR不好评价,采用ROC曲线
FPR, TPR, thresholds = metrics.roc_curve(y_test, prob_pred, pos_label=1)
metrics.auc(FPR, TPR)
0.8499778184241903
# 画图对预测值和实际值进行比较
plt.plot(FPR, TPR, 'b', label='AUC = %0.2f' % metrics.auc(FPR, TPR)) # 生成ROC曲线
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('真正率')
plt.xlabel('假正率')
plt.show()
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ZuGQDu7G-1577332256208)(output_33_0.png)]
从上图可知,AUC值为0.85,说明该模型的预测效果还是不错的,正确率较高
odds为good用户概率(p)与bad用户概率(1-p)的比值
odds = p 1 − p \operatorname{odds}=\frac{p}{1-p} odds=1−pp
评分卡设定的分值刻度可以通过将分值表示为比率对数的现行表达式来定义。公式如下:
s c o r e 总 = A + B ∗ ln ( o d d s ) score_{总}=A+B{*}\ln(odds) score总=A+B∗ln(odds)
常数 A 和 B 通常被称为补偿和刻度,它们的值可以通过将两个已知或者假设的分值带入 s c o r e 总 = A + B ∗ ln ( o d d s ) score_{总}=A+B{*}\ln(odds) score总=A+B∗ln(odds) 中得到。通常,需要两个假设:
首先,设定比率为odds的特定点的分值为 P 0 P_{0} P0。然后,比率为 2odds的点分值为 P 0 − P D O P_{0}-PDO P0−PDO,带入可以得到
B = P D O log ( 2 ) B=\frac{PDO}{\log (2)} B=log(2)PDO
A = P 0 + B log ( o d d s ) A=P_{0}+B \log \left(odds\right) A=P0+Blog(odds)
import math
# PDO为比率翻番的分数,P0为特定比例的预期分值,B为刻度
PDO = 20
P0 = 600
B = PDO / math.log(2)
B
28.85390081777927
# A为补偿
A = P0 + B * math.log(1 / 60)
A
481.8621880878296
最终,评分卡的分值可以写成下列形式:
Score = A − B ( β 0 + β 1 x 1 + ⋯ + β p x p ) =A-B\left(\beta_{0}+\beta_{1} x_{1}+\cdots+\beta_{p} x_{p}\right) =A−B(β0+β1x1+⋯+βpxp)
变量 x 1 x_{1} x1,⋯, x p x_{p} xp为自变量对应WOE, β 0 \beta_{0} β0,⋯, β p \beta_{p} βp为逻辑斯蒂回归方程的系数
# 逻辑斯蒂回归的系数列表
coef_list = list(result.coef_[0])
coef_list.insert(0, result.intercept_[0])
# 计算信用评分
def credit_socre(data, coef):
score_list = []
for i in range(data.shape[0]):
tmp_score = coef[0]
for j in range(data.shape[1]):
tmp_score += data.iat[i, j] * coef[j + 1]
score = A - B * tmp_score
score_list.append(score)
return score_list
score_list = credit_socre(data_woe.iloc[:, 1:], coef_list)
data_woe.insert(11, 'credit_score', score_list)
data_woe.head().append(data_woe.tail())
# 在原始数据中插入信用评分
data.insert(11, 'credit_socre', score_list)
data.head().append(data.tail())