%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
import os
import re
import gc
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 100)
def diff_max_min(x):
return x.max() - x.min()
path1 = 'B产品/'
path2 = '其他数据表/'
train = pd.read_csv(path1 + 'train_b.csv')
train.head(2)
|
id |
core_cust_id |
prod_code |
a2 |
a3 |
y |
0 |
70e7f0465877447aa44c8d3120d0414c |
9cb1f66b15 |
SSTJMZKF001 |
3 |
2021-07-01 |
0 |
1 |
b069c78512614452a7e815d231d8c580 |
9a5deb2794 |
SSTJMZKF001 |
3 |
2021-08-01 |
0 |
train.shape[0]
339516
test = pd.read_csv(path1 + 'test_b.csv')test.head(2)
|
id |
core_cust_id |
prod_code |
c2 |
c3 |
0 |
2360e70c585a4d8a922ad2590e0bf494 |
a030075b9 |
SSTJMZKF001 |
3 |
2021-10-01 |
1 |
8c565852b73b4a5fafc19b88a5ad8899 |
a030075b9 |
SSTJMZKF002 |
3 |
2021-10-01 |
test.rename(columns={'c2':'a2', 'c3':'a3'}, inplace=True)
test.shape[0]
51461
训练集339516, 测试集51461, 总共:390977
df
train['type'] = 'train'test['type'] = 'test'df = pd.concat([train, test])
df.head(2).append(df.tail(2))
|
id |
core_cust_id |
prod_code |
a2 |
a3 |
y |
type |
0 |
70e7f0465877447aa44c8d3120d0414c |
9cb1f66b15 |
SSTJMZKF001 |
3 |
2021-07-01 |
0.0 |
train |
1 |
b069c78512614452a7e815d231d8c580 |
9a5deb2794 |
SSTJMZKF001 |
3 |
2021-08-01 |
0.0 |
train |
51459 |
323ed9139ecb4a5286f61e58ce3e3bab |
4d885e237d |
SSTJMZKF002 |
3 |
2021-10-01 |
NaN |
test |
51460 |
0330a1471d6b4f868392eaa450110db3 |
7c195ef1fa |
SSTJMZKF001 |
3 |
2021-10-01 |
NaN |
test |
df.shape[0]
390977
B产品表
h = pd.read_csv(path2 + 'h.csv')
h.head(2)
|
prod_code |
h1 |
h2 |
h3 |
h4 |
h5 |
h6 |
h7 |
h8 |
0 |
ZYGR2016286 |
0 |
1 |
1 |
2 |
0 |
0 |
NaN |
209912 |
1 |
ZYGR2015103 |
0 |
1 |
1 |
2 |
1 |
0 |
NaN |
209912 |
h_columns = ['prod_code', '计价类型', '周期类型', '模式', '风险等级', '是否允许变更分红方式', '产品品种', '模式2', '数据日期']h.columns = h_columnsh.head(2)
|
prod_code |
计价类型 |
周期类型 |
模式 |
风险等级 |
是否允许变更分红方式 |
产品品种 |
模式2 |
数据日期 |
0 |
ZYGR2016286 |
0 |
1 |
1 |
2 |
0 |
0 |
NaN |
209912 |
1 |
ZYGR2015103 |
0 |
1 |
1 |
2 |
1 |
0 |
NaN |
209912 |
h.info()
RangeIndex: 2696 entries, 0 to 2695Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 prod_code 2696 non-null object 1 计价类型 2696 non-null int64 2 周期类型 2696 non-null int64 3 模式 2696 non-null int64 4 风险等级 2696 non-null int64 5 是否允许变更分红方式 2696 non-null int64 6 产品品种 2696 non-null int64 7 模式2 65 non-null float64 8 数据日期 2696 non-null int64 dtypes: float64(1), int64(7), object(1)memory usage: 189.7+ KB
h.nunique()
prod_code 2696计价类型 2周期类型 2模式 2风险等级 3是否允许变更分红方式 2产品品种 1模式2 7数据日期 3dtype: int64
h.drop(['产品品种', '模式2'], axis=1, inplace=True)
l = pd.read_csv(path2 + 'l.csv')l_columns = ['prod_code', '募集方式', '管理方式', '业务模式', '收益特点', '期限', '投资模式', '数据日期']l.columns = l_columnsl.head(2)
|
prod_code |
募集方式 |
管理方式 |
业务模式 |
收益特点 |
期限 |
投资模式 |
数据日期 |
0 |
YXFB2017031 |
0.0 |
2.0 |
3.0 |
1 |
NaN |
0 |
209912 |
1 |
ZYGR2016014 |
1.0 |
2.0 |
1.0 |
2 |
4.0 |
1 |
209912 |
l.info()
RangeIndex: 3002 entries, 0 to 3001
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 prod_code 3002 non-null object
1 募集方式 2502 non-null float64
2 管理方式 2502 non-null float64
3 业务模式 2502 non-null float64
4 收益特点 3002 non-null int64
5 期限 567 non-null float64
6 投资模式 3002 non-null int64
7 数据日期 3002 non-null int64
dtypes: float64(4), int64(3), object(1)
memory usage: 187.8+ KB
l.nunique()
prod_code 3002
募集方式 2
管理方式 2
业务模式 2
收益特点 2
期限 5
投资模式 2
数据日期 3
dtype: int64
b_prod = pd.merge(h, l, on=['prod_code', '数据日期'], how='outer')b_prod.head(3)
|
prod_code |
计价类型 |
周期类型 |
模式 |
风险等级 |
是否允许变更分红方式 |
数据日期 |
募集方式 |
管理方式 |
业务模式 |
收益特点 |
期限 |
投资模式 |
0 |
ZYGR2016286 |
0.0 |
1.0 |
1.0 |
2.0 |
0.0 |
209912 |
0.0 |
2.0 |
1.0 |
2 |
NaN |
1 |
1 |
ZYGR2015103 |
0.0 |
1.0 |
1.0 |
2.0 |
1.0 |
209912 |
1.0 |
2.0 |
1.0 |
2 |
6.0 |
1 |
2 |
YQ2017167 |
0.0 |
1.0 |
1.0 |
1.0 |
0.0 |
209912 |
0.0 |
2.0 |
1.0 |
2 |
NaN |
0 |
b_prod['数据日期'] = b_prod['数据日期'].astype('object')
b_prod.info()
Int64Index: 3002 entries, 0 to 3001Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 prod_code 3002 non-null object 1 计价类型 2696 non-null float64 2 周期类型 2696 non-null float64 3 模式 2696 non-null float64 4 风险等级 2696 non-null float64 5 是否允许变更分红方式 2696 non-null float64 6 数据日期 3002 non-null object 7 募集方式 2502 non-null float64 8 管理方式 2502 non-null float64 9 业务模式 2502 non-null float64 10 收益特点 3002 non-null int64 11 期限 567 non-null float64 12 投资模式 3002 non-null int64 dtypes: float64(9), int64(2), object(2)memory usage: 328.3+ KB
df + 产品表
gc.collect()
13611
B产品流水表
o1 = pd.read_csv('o_bc.csv', thousands=',')
o = pd.read_csv('o.csv', thousands=',')
o = pd.concat([o, o1])
o_columns = ['流水号', '业务代码', '渠道标识', 'core_cust_id', 'prod_code', '净值', '申请金额',
'交易状态', '资金状态', '总金额', '超额管理费', 'trade_date']
o.columns = o_columns
o.head(2)
|
流水号 |
业务代码 |
渠道标识 |
core_cust_id |
prod_code |
净值 |
申请金额 |
交易状态 |
资金状态 |
总金额 |
超额管理费 |
trade_date |
0 |
ALC202107210000007437420 |
2 |
1 |
b1a66424c4 |
SSTJMZKF002 |
1.0085 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210721 |
1 |
ALC202107140000007373365 |
2 |
1 |
e733784b55 |
SSTJMZKF002 |
1.0071 |
18110.0 |
3 |
3 |
110.0 |
110.0 |
20210714 |
o['trade_date'] = o['trade_date'].astype('str')
o['trade_date'].min(), o['trade_date'].max()
('20210104', '20211130')
o['datetime'] = pd.to_datetime(o['trade_date'],errors='coerce', format='%Y-%m-%d')
o.head()
|
流水号 |
业务代码 |
渠道标识 |
core_cust_id |
prod_code |
净值 |
申请金额 |
交易状态 |
资金状态 |
总金额 |
超额管理费 |
trade_date |
datetime |
record_month |
record_day |
record_dayofyear |
record_weekofyear |
record_weekday |
record_quarter |
record_is_wknd |
0 |
ALC202107210000007437420 |
2 |
1 |
b1a66424c4 |
SSTJMZKF002 |
1.0085 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210721 |
2021-07-21 |
7 |
21 |
202 |
29 |
2 |
3 |
0 |
1 |
ALC202107140000007373365 |
2 |
1 |
e733784b55 |
SSTJMZKF002 |
1.0071 |
18110.0 |
3 |
3 |
110.0 |
110.0 |
20210714 |
2021-07-14 |
7 |
14 |
195 |
28 |
2 |
3 |
0 |
2 |
LC2021022300000006329444 |
8 |
5 |
1ef76cc3c0 |
YZFB0032 |
-999.0000 |
110.0 |
3 |
6 |
110.0 |
110.0 |
20210223 |
2021-02-23 |
2 |
23 |
54 |
8 |
1 |
1 |
0 |
3 |
LC2021051300000006917384 |
8 |
5 |
72c91d39c4 |
201811140151 |
1.0000 |
110.0 |
3 |
6 |
110.0 |
110.0 |
20210513 |
2021-05-13 |
5 |
13 |
133 |
19 |
3 |
2 |
0 |
4 |
LC2021062400000007193297 |
10 |
5 |
72c91d39c4 |
DXTY0289 |
1.0000 |
110.0 |
3 |
0 |
110.0 |
110.0 |
20210624 |
2021-06-24 |
6 |
24 |
175 |
25 |
3 |
2 |
0 |
o.info()
Int64Index: 53839 entries, 0 to 11346Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 流水号 53839 non-null object 1 业务代码 53839 non-null int64 2 渠道标识 53839 non-null int64 3 core_cust_id 53839 non-null object 4 prod_code 53839 non-null object 5 净值 53839 non-null float64 6 申请金额 53839 non-null float64 7 交易状态 53839 non-null int64 8 资金状态 53839 non-null int64 9 总金额 53839 non-null float64 10 超额管理费 53839 non-null float64 11 trade_date 53839 non-null int64 dtypes: float64(4), int64(5), object(3)memory usage: 5.3+ MB
o.nunique()
流水号 53839业务代码 9渠道标识 4core_cust_id 15683prod_code 114净值 106申请金额 798交易状态 5资金状态 5总金额 1970超额管理费 1839trade_date 228dtype: int64
o.nunique()
流水号 42492业务代码 9渠道标识 4core_cust_id 13678prod_code 112净值 87申请金额 741交易状态 5资金状态 5总金额 1123超额管理费 960trade_date 188dtype: int64
B产品毫无作用
df+产品流水:待进一步处理
o.head(1)
num_b = []nums_b = []for i in range(1, 12): num = o[(o['record_month']==i) & (o['if_buy']==1)].shape[0] num_b.append(num) nums = o[(o['record_month']==i)].shape[0] nums_b.append(nums) print('{}月的购买数量为{}, 总共流水数为{}'.format(i, num, nums))
1月的购买数量为0, 总共流水数为14032月的购买数量为0, 总共流水数为12133月的购买数量为1276, 总共流水数为57304月的购买数量为4053, 总共流水数为74065月的购买数量为3060, 总共流水数为70096月的购买数量为2535, 总共流水数为52597月的购买数量为2688, 总共流水数为51588月的购买数量为3235, 总共流水数为47559月的购买数量为2877, 总共流水数为455910月的购买数量为3224, 总共流水数为556911月的购买数量为2654, 总共流水数为5778
x_b = [i for i in range(1, 12)]plt.plot(x_b[2:], num_b[2:])plt.plot(x_b[2:], nums_b[2:])
[]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-rE4kvdFe-1643361700218)(output_45_1.png)]
st = o[o['record_month']==8].groupby(['core_cust_id', 'prod_code'])['if_buy'].sum().reset_index()st['if_buy'].value_counts()
1 24180 9712 2723 464 195 37 19 113 115 1Name: if_buy, dtype: int64
2418+272+46+19+3+4
2762
o2 = o[['流水号', 'core_cust_id', 'prod_code', '净值', 'apply_amt', '总金额', '资金状态', 'record_month', 'trade_date', 'record_month', 'if_buy']]
o2.head(2)
|
流水号 |
core_cust_id |
prod_code |
净值 |
apply_amt |
总金额 |
资金状态 |
record_month |
trade_date |
record_month |
if_buy |
0 |
ALC202107210000007437420 |
b1a66424c4 |
SSTJMZKF002 |
1.0085 |
75110.0 |
110.0 |
3 |
7 |
20210721 |
7 |
1 |
1 |
ALC202107140000007373365 |
e733784b55 |
SSTJMZKF002 |
1.0071 |
18110.0 |
110.0 |
3 |
7 |
20210714 |
7 |
1 |
|
流水号 |
core_cust_id |
prod_code |
净值 |
apply_amt |
总金额 |
资金状态 |
record_month |
trade_date |
record_month |
if_buy |
0 |
ALC202107210000007437420 |
b1a66424c4 |
SSTJMZKF002 |
1.0085 |
75110.0 |
110.0 |
3 |
7 |
20210721 |
7 |
1 |
1 |
ALC202107140000007373365 |
e733784b55 |
SSTJMZKF002 |
1.0071 |
18110.0 |
110.0 |
3 |
7 |
20210714 |
7 |
1 |
2 |
LC2021022300000006329444 |
1ef76cc3c0 |
YZFB0032 |
-999.0000 |
110.0 |
110.0 |
6 |
2 |
20210223 |
2 |
0 |
3 |
LC2021051300000006917384 |
72c91d39c4 |
201811140151 |
1.0000 |
110.0 |
110.0 |
6 |
5 |
20210513 |
5 |
0 |
4 |
LC2021062400000007193297 |
72c91d39c4 |
DXTY0289 |
1.0000 |
110.0 |
110.0 |
0 |
6 |
20210624 |
6 |
0 |
o.sort_values(['core_cust_id', 'prod_code'], inplace=True)
o.reset_index(inplace=True)
del o['index']
o[(o['record_month']==8) & (o['if_buy']==1)].shape[0]
3235
oo = o[(o['record_month']==8) & (o['if_buy']==1)]
oo.drop_duplicates(['core_cust_id', 'prod_code'], inplace=True)
oo.shape[0]
2762
oo[oo['record_day']==30].shape[0]
149
用户对产品的交易次数
oo.drop_duplicates(['core_cust_id', 'prod_code', 'apply_amt', '净值']).shape[0]
2762
oo[:5]
|
流水号 |
业务代码 |
渠道标识 |
core_cust_id |
prod_code |
净值 |
apply_amt |
交易状态 |
资金状态 |
总金额 |
超额管理费 |
trade_date |
datetime |
record_month |
record_day |
record_dayofyear |
record_weekofyear |
record_weekday |
record_quarter |
record_is_wknd |
deal_bus |
if_buy |
deal_bus_channel |
deal_bus_fund |
deal_bus_c_f |
43 |
ALC202108110000007585651 |
2 |
1 |
1087ee18a7 |
SSTJMZKF002 |
1.0117 |
142610.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
92 |
ALC202108300000007699259 |
2 |
1 |
1087ee2dc3 |
SSTJMZKF001 |
1.0208 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
96 |
ALC202108240000007666267 |
2 |
1 |
1087ee2e87 |
SSTJMZKF001 |
1.0200 |
154610.0 |
3 |
3 |
110.0 |
110.0 |
20210824 |
2021-08-24 |
8 |
24 |
236 |
34 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
135 |
ALC202108030000007537694 |
2 |
1 |
1087ee5402 |
SSTJMZKF001 |
1.0178 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
151 |
ALC202108190000007632217 |
2 |
1 |
1087ee55fd |
SSTJMZKF001 |
1.0200 |
1350110.0 |
3 |
3 |
110.0 |
110.0 |
20210819 |
2021-08-19 |
8 |
19 |
231 |
33 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
o[(o['record_month']==8) & (o['if_buy']==1)][:100]
|
流水号 |
业务代码 |
渠道标识 |
core_cust_id |
prod_code |
净值 |
apply_amt |
交易状态 |
资金状态 |
总金额 |
超额管理费 |
trade_date |
datetime |
record_month |
record_day |
record_dayofyear |
record_weekofyear |
record_weekday |
record_quarter |
record_is_wknd |
deal_bus |
if_buy |
deal_bus_channel |
deal_bus_fund |
deal_bus_c_f |
43 |
ALC202108110000007585651 |
2 |
1 |
1087ee18a7 |
SSTJMZKF002 |
1.0117 |
142610.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
44 |
ALC202108180000007627651 |
2 |
1 |
1087ee18a7 |
SSTJMZKF002 |
1.0127 |
90110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
92 |
ALC202108300000007699259 |
2 |
1 |
1087ee2dc3 |
SSTJMZKF001 |
1.0208 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
96 |
ALC202108240000007666267 |
2 |
1 |
1087ee2e87 |
SSTJMZKF001 |
1.0200 |
154610.0 |
3 |
3 |
110.0 |
110.0 |
20210824 |
2021-08-24 |
8 |
24 |
236 |
34 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
135 |
ALC202108030000007537694 |
2 |
1 |
1087ee5402 |
SSTJMZKF001 |
1.0178 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
151 |
ALC202108190000007632217 |
2 |
1 |
1087ee55fd |
SSTJMZKF001 |
1.0200 |
1350110.0 |
3 |
3 |
110.0 |
110.0 |
20210819 |
2021-08-19 |
8 |
19 |
231 |
33 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
165 |
ALC202108040000007543279 |
2 |
1 |
1087ee578a |
SSTJMZKF001 |
1.0186 |
1399610.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
174 |
ALC202108060000007559516 |
2 |
1 |
1087ee578a |
SSTJMZKF002 |
1.0117 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210806 |
2021-08-06 |
8 |
6 |
218 |
31 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
178 |
ALC202108240000007666537 |
2 |
1 |
1087ee61b5 |
SSTJMZKF002 |
1.0132 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210824 |
2021-08-24 |
8 |
24 |
236 |
34 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
182 |
ALC202108120000007593777 |
2 |
1 |
1087ee62dc |
SSTJMZKF001 |
1.0194 |
45110.0 |
3 |
3 |
110.0 |
110.0 |
20210812 |
2021-08-12 |
8 |
12 |
224 |
32 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
183 |
ALC202108160000007609755 |
2 |
1 |
1087ee62dc |
SSTJMZKF002 |
1.0127 |
45110.0 |
3 |
3 |
110.0 |
110.0 |
20210816 |
2021-08-16 |
8 |
16 |
228 |
33 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
202 |
ALC202108300000007694893 |
2 |
1 |
1087ee6dd1 |
SSTJMZKF001 |
1.0208 |
300110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
204 |
ALC202108310000007709314 |
2 |
1 |
1087ee6dd1 |
SSTJMZKF002 |
1.0140 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
227 |
ALC202108030000007536410 |
2 |
1 |
1087ee7987 |
SSTJMZKF001 |
1.0178 |
315110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
241 |
ALC202108180000007626876 |
2 |
1 |
1087ee8222 |
SSTJMZKF002 |
1.0127 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
251 |
ALC202108260000007678005 |
2 |
1 |
1087ee8541 |
SSTJMZKF002 |
1.0140 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210826 |
2021-08-26 |
8 |
26 |
238 |
34 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
258 |
ALC202108120000007594293 |
2 |
1 |
10ff23a3aa |
SSTJMZKF001 |
1.0194 |
123110.0 |
3 |
3 |
110.0 |
110.0 |
20210812 |
2021-08-12 |
8 |
12 |
224 |
32 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
263 |
ALC202108020000007519975 |
2 |
1 |
10ff23a3aa |
SSTJMZKF002 |
1.0109 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
287 |
ALC202108270000007685686 |
2 |
2 |
10ff23b1ba |
SSTJMZKF002 |
1.0140 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210827 |
2021-08-27 |
8 |
27 |
239 |
34 |
4 |
3 |
1 |
32 |
1 |
322 |
323 |
3223 |
323 |
ALC202108160000007609399 |
2 |
1 |
10ff23d864 |
SSTJMZKF001 |
1.0194 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210816 |
2021-08-16 |
8 |
16 |
228 |
33 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
327 |
ALC202108100000007579204 |
2 |
1 |
10ff23d869 |
SSTJMZKF001 |
1.0186 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
330 |
ALC202108020000007517266 |
2 |
1 |
10ff23d869 |
SSTJMZKF001 |
1.0178 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
368 |
ALC202108200000007640207 |
2 |
1 |
10ff23f166 |
SSTJMZKF001 |
1.0200 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210820 |
2021-08-20 |
8 |
20 |
232 |
33 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
373 |
ALC202108030000007538763 |
2 |
1 |
10ff23f28f |
SSTJMZKF001 |
1.0178 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
375 |
ALC202108030000007538770 |
2 |
1 |
10ff23f28f |
SSTJMZKF001 |
1.0178 |
7610.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
391 |
ALC202108170000007617318 |
2 |
1 |
10ff23f6dd |
SSTJMZKF001 |
1.0194 |
90110.0 |
3 |
3 |
110.0 |
110.0 |
20210817 |
2021-08-17 |
8 |
17 |
229 |
33 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
395 |
ALC202108270000007682839 |
2 |
1 |
10ff23f6dd |
SSTJMZKF002 |
1.0140 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210827 |
2021-08-27 |
8 |
27 |
239 |
34 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
400 |
ALC202108030000007536210 |
2 |
1 |
10ff23f7a3 |
SSTJMZKF001 |
1.0178 |
58610.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
402 |
ALC202108180000007625004 |
2 |
1 |
10ff23f7a3 |
SSTJMZKF002 |
1.0127 |
184610.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
425 |
ALC202108110000007584513 |
2 |
1 |
10ff2407a7 |
SSTJMZKF002 |
1.0117 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
428 |
ALC202108200000007635938 |
2 |
1 |
10ff2407a7 |
SSTJMZKF002 |
1.0132 |
85610.0 |
3 |
3 |
110.0 |
110.0 |
20210820 |
2021-08-20 |
8 |
20 |
232 |
33 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
430 |
ALC202108100000007576554 |
2 |
1 |
10ff2407a7 |
SSTJMZKF002 |
1.0117 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
433 |
ALC202108020000007494374 |
2 |
1 |
10ff240b90 |
SSTJMZKF002 |
1.0109 |
81110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
437 |
ALC202108270000007685958 |
2 |
1 |
10ff2410a3 |
SSTJMZKF002 |
1.0140 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210827 |
2021-08-27 |
8 |
27 |
239 |
34 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
442 |
ALC202108100000007576353 |
2 |
1 |
10ff24161f |
SSTJMZKF001 |
1.0186 |
37610.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
447 |
ALC202108100000007576360 |
2 |
1 |
10ff24161f |
SSTJMZKF002 |
1.0117 |
45110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
562 |
ALC202108020000007520147 |
2 |
1 |
10ff244ada |
SSTJMZKF002 |
1.0109 |
16610.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
563 |
ALC202108020000007520133 |
2 |
1 |
10ff244ada |
SSTJMZKF002 |
1.0109 |
228110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
594 |
ALC202108040000007544729 |
2 |
1 |
10ff244d35 |
SSTJMZKF002 |
1.0109 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
598 |
ALC202108230000007661626 |
2 |
1 |
10ff244f8c |
SSTJMZKF001 |
1.0200 |
495110.0 |
3 |
3 |
110.0 |
110.0 |
20210823 |
2021-08-23 |
8 |
23 |
235 |
34 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
612 |
ALC202108060000007553782 |
2 |
1 |
10ff24562c |
SSTJMZKF002 |
1.0117 |
225110.0 |
3 |
3 |
110.0 |
110.0 |
20210806 |
2021-08-06 |
8 |
6 |
218 |
31 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
626 |
ALC202108100000007576431 |
2 |
1 |
10ff2459b7 |
SSTJMZKF002 |
1.0117 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
639 |
ALC202108230000007655132 |
2 |
1 |
10ff246e67 |
SSTJMZKF001 |
1.0200 |
90110.0 |
3 |
3 |
110.0 |
110.0 |
20210823 |
2021-08-23 |
8 |
23 |
235 |
34 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
658 |
ALC202108200000007636530 |
2 |
1 |
10ff246ecc |
SSTJMZKF002 |
1.0132 |
27110.0 |
3 |
3 |
110.0 |
110.0 |
20210820 |
2021-08-20 |
8 |
20 |
232 |
33 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
661 |
ALC202108160000007608465 |
2 |
1 |
10ff24724b |
SSTJMZKF001 |
1.0194 |
31610.0 |
3 |
3 |
110.0 |
110.0 |
20210816 |
2021-08-16 |
8 |
16 |
228 |
33 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
663 |
ALC202108180000007629341 |
2 |
1 |
10ff247761 |
SSTJMZKF002 |
1.0127 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
665 |
ALC202108180000007629074 |
2 |
1 |
10ff247761 |
SSTJMZKF002 |
1.0127 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
678 |
ALC202108030000007536327 |
2 |
1 |
10ff247f99 |
SSTJMZKF001 |
1.0178 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
703 |
ALC202108020000007522986 |
2 |
1 |
10ff24888f |
SSTJMZKF002 |
1.0109 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
707 |
ALC202108310000007706589 |
2 |
1 |
10ff2488f4 |
SSTJMZKF001 |
1.0208 |
225110.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
726 |
ALC202108030000007537438 |
2 |
1 |
10ff24895a |
SSTJMZKF001 |
1.0178 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
768 |
ALC202108240000007667112 |
2 |
2 |
10ff2491f2 |
SSTJMZKF001 |
1.0200 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210824 |
2021-08-24 |
8 |
24 |
236 |
34 |
1 |
3 |
0 |
32 |
1 |
322 |
323 |
3223 |
776 |
ALC202108060000007552881 |
2 |
1 |
10ff249643 |
SSTJMZKF001 |
1.0186 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210806 |
2021-08-06 |
8 |
6 |
218 |
31 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
788 |
ALC202108250000007672921 |
2 |
1 |
10ff24a190 |
SSTJMZKF001 |
1.0208 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210825 |
2021-08-25 |
8 |
25 |
237 |
34 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
833 |
ALC202108030000007537223 |
2 |
1 |
10ff24b51b |
SSTJMZKF001 |
1.0178 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210803 |
2021-08-03 |
8 |
3 |
215 |
31 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
864 |
ALC202108160000007612176 |
2 |
1 |
10ff24ba93 |
SSTJMZKF002 |
1.0127 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210816 |
2021-08-16 |
8 |
16 |
228 |
33 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
870 |
ALC202108200000007642125 |
2 |
1 |
10ff24bbc3 |
SSTJMZKF002 |
1.0132 |
135110.0 |
3 |
3 |
110.0 |
110.0 |
20210820 |
2021-08-20 |
8 |
20 |
232 |
33 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
871 |
ALC202108190000007632855 |
2 |
1 |
10ff24bbc3 |
SSTJMZKF002 |
1.0132 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210819 |
2021-08-19 |
8 |
19 |
231 |
33 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
912 |
ALC202108180000007628292 |
2 |
1 |
10ff24d523 |
SSTJMZKF002 |
1.0127 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
915 |
ALC202108200000007641380 |
2 |
1 |
10ff24d718 |
SSTJMZKF002 |
1.0132 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210820 |
2021-08-20 |
8 |
20 |
232 |
33 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
916 |
ALC202108050000007549306 |
2 |
2 |
10ff24d9ce |
SSTJMZKF002 |
1.0117 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210805 |
2021-08-05 |
8 |
5 |
217 |
31 |
3 |
3 |
0 |
32 |
1 |
322 |
323 |
3223 |
934 |
ALC202108020000007517268 |
2 |
1 |
10ff24ec2b |
SSTJMZKF002 |
1.0109 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
943 |
ALC202108250000007672948 |
2 |
2 |
10ff24f397 |
SSTJMZKF002 |
1.0132 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210825 |
2021-08-25 |
8 |
25 |
237 |
34 |
2 |
3 |
0 |
32 |
1 |
322 |
323 |
3223 |
974 |
ALC202108040000007546238 |
2 |
1 |
10ff24fbca |
SSTJMZKF002 |
1.0109 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
984 |
ALC202108040000007542238 |
2 |
1 |
10ff24fc96 |
SSTJMZKF002 |
1.0109 |
285110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
985 |
ALC202108180000007629157 |
2 |
1 |
10ff24fc96 |
SSTJMZKF002 |
1.0127 |
135110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1030 |
ALC202108310000007706732 |
2 |
1 |
10ff25097c |
SSTJMZKF001 |
1.0208 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1082 |
ALC202108170000007617418 |
2 |
1 |
10ff333597 |
SSTJMZKF001 |
1.0194 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210817 |
2021-08-17 |
8 |
17 |
229 |
33 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1083 |
ALC202108180000007628072 |
2 |
1 |
10ff33372b |
SSTJMZKF001 |
1.0200 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1097 |
ALC202108170000007618242 |
2 |
1 |
10ff335ab9 |
SSTJMZKF002 |
1.0127 |
240110.0 |
3 |
3 |
110.0 |
110.0 |
20210817 |
2021-08-17 |
8 |
17 |
229 |
33 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1117 |
ALC202108110000007587848 |
2 |
1 |
10ff3385aa |
SSTJMZKF002 |
1.0117 |
16610.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1163 |
ALC202108100000007581893 |
2 |
1 |
10ff34318e |
SSTJMZKF001 |
1.0186 |
120110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1164 |
ALC202108050000007550230 |
2 |
1 |
10ff34318e |
SSTJMZKF001 |
1.0186 |
84110.0 |
3 |
3 |
110.0 |
110.0 |
20210805 |
2021-08-05 |
8 |
5 |
217 |
31 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1186 |
ALC202108230000007656206 |
2 |
1 |
10ff34525c |
SSTJMZKF001 |
1.0200 |
60110.0 |
3 |
3 |
110.0 |
110.0 |
20210823 |
2021-08-23 |
8 |
23 |
235 |
34 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1222 |
ALC202108120000007593807 |
2 |
1 |
12dbfa1167 |
SSTJMZKF002 |
1.0127 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210812 |
2021-08-12 |
8 |
12 |
224 |
32 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1398 |
ALC202108300000007697805 |
2 |
2 |
12dbfa64f9 |
SSTJMZKF001 |
1.0208 |
45110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
322 |
323 |
3223 |
1405 |
ALC202108310000007701049 |
2 |
1 |
12dbfa681e |
SSTJMZKF002 |
1.0140 |
22610.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1411 |
ALC202108310000007709598 |
2 |
1 |
12dbfa681e |
SSTJMZKF002 |
1.0140 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1416 |
ALC202108020000007531945 |
2 |
1 |
12dbfa6880 |
SSTJMZKF001 |
1.0178 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1483 |
ALC202108020000007530552 |
2 |
1 |
13532fa758 |
SSTJMZKF002 |
1.0109 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210802 |
2021-08-02 |
8 |
2 |
214 |
31 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1484 |
ALC202108110000007586048 |
2 |
1 |
13532faad9 |
SSTJMZKF002 |
1.0117 |
300110.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1501 |
ALC202108270000007683188 |
2 |
1 |
13532fb886 |
SSTJMZKF002 |
1.0140 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210827 |
2021-08-27 |
8 |
27 |
239 |
34 |
4 |
3 |
1 |
32 |
1 |
321 |
323 |
3213 |
1502 |
ALC202108300000007694268 |
2 |
1 |
13532fb886 |
SSTJMZKF002 |
1.0140 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1507 |
ALC202108040000007545078 |
2 |
1 |
13532fb886 |
SSTJMZKF002 |
1.0109 |
48110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1510 |
ALC202108040000007545374 |
2 |
1 |
13532fb886 |
SSTJMZKF002 |
1.0109 |
27110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1521 |
ALC202108050000007550087 |
2 |
1 |
13532fc054 |
SSTJMZKF001 |
1.0186 |
450110.0 |
3 |
3 |
110.0 |
110.0 |
20210805 |
2021-08-05 |
8 |
5 |
217 |
31 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1611 |
ALC202108310000007708497 |
2 |
1 |
13532fed49 |
SSTJMZKF001 |
1.0208 |
37610.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1639 |
ALC202108300000007700121 |
2 |
1 |
13532ff706 |
SSTJMZKF001 |
1.0208 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1641 |
ALC202108240000007663925 |
2 |
1 |
13532ff706 |
SSTJMZKF001 |
1.0200 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210824 |
2021-08-24 |
8 |
24 |
236 |
34 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1647 |
ALC202108040000007542107 |
2 |
1 |
13532ff706 |
SSTJMZKF002 |
1.0109 |
15110.0 |
3 |
3 |
110.0 |
110.0 |
20210804 |
2021-08-04 |
8 |
4 |
216 |
31 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1650 |
ALC202108110000007588997 |
2 |
1 |
13532ff706 |
SSTJMZKF002 |
1.0117 |
75110.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1652 |
ALC202108050000007551715 |
2 |
1 |
13532ff70a |
SSTJMZKF001 |
1.0186 |
52610.0 |
3 |
3 |
110.0 |
110.0 |
20210805 |
2021-08-05 |
8 |
5 |
217 |
31 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1653 |
ALC202108260000007678351 |
2 |
1 |
13532ff70a |
SSTJMZKF001 |
1.0208 |
135110.0 |
3 |
3 |
110.0 |
110.0 |
20210826 |
2021-08-26 |
8 |
26 |
238 |
34 |
3 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1659 |
ALC202108110000007585615 |
2 |
1 |
13532ff70a |
SSTJMZKF002 |
1.0117 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210811 |
2021-08-11 |
8 |
11 |
223 |
32 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1665 |
ALC202108300000007696531 |
2 |
1 |
13532ff70b |
SSTJMZKF001 |
1.0208 |
150110.0 |
3 |
3 |
110.0 |
110.0 |
20210830 |
2021-08-30 |
8 |
30 |
242 |
35 |
0 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1686 |
ALC202108180000007628699 |
2 |
1 |
13532ffd4b |
SSTJMZKF002 |
1.0127 |
450110.0 |
3 |
3 |
110.0 |
110.0 |
20210818 |
2021-08-18 |
8 |
18 |
230 |
33 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1704 |
ALC202108310000007704681 |
2 |
1 |
13533003f0 |
SSTJMZKF001 |
1.0208 |
55610.0 |
3 |
3 |
110.0 |
110.0 |
20210831 |
2021-08-31 |
8 |
31 |
243 |
35 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1706 |
ALC202108100000007576098 |
2 |
1 |
13533003f0 |
SSTJMZKF001 |
1.0186 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1716 |
ALC202108250000007670596 |
2 |
1 |
13533003f0 |
SSTJMZKF002 |
1.0132 |
67610.0 |
3 |
3 |
110.0 |
110.0 |
20210825 |
2021-08-25 |
8 |
25 |
237 |
34 |
2 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
1717 |
ALC202108100000007576092 |
2 |
1 |
13533003f0 |
SSTJMZKF002 |
1.0117 |
30110.0 |
3 |
3 |
110.0 |
110.0 |
20210810 |
2021-08-10 |
8 |
10 |
222 |
32 |
1 |
3 |
0 |
32 |
1 |
321 |
323 |
3213 |
o_col_all = ['净值', '总金额', '超额管理费', 'apply_amt']o_cols = ['业务代码', '渠道标识', '资金状态', '交易状态', 'deal_bus', 'deal_bus_channel', 'deal_bus_fund', 'deal_bus_c_f']
o['date'] = o['trade_date'].apply(lambda x: x[:6])
dict_ = {'2021-07-01':'202106', '2021-08-01':'202107', '2021-09-01':'202108', '2021-10-01':'202109'}
dict_1 = {'2021-07-01':['202105', '202106'], '2021-08-01':['202106', '202107'],
'2021-09-01':['202107', '202108'], '2021-10-01':['202108', '202109']}
data = o.copy()
dfs = []
split_month = ['redu_1', 'redu_2', 'redu_b', 'redu_b1', 'redu_b2']
for month in sorted(df['a3'].unique()):
print(month)
tmp_df = df[df['a3'] == month]
stat = data[data['date'] == dict_[month]].groupby('core_cust_id')['prod_code'].count().reset_index()
stat.columns = ['core_cust_id', 'uid_count_1']
stat['pid_nunique_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')['prod_code'].agg(
'nunique').values
stat['pid_mean_count_1'] = stat['uid_count_1'] / stat['pid_nunique_1']
stat['buy_num_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'sum').values
stat['buy_rate_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'mean').values
for col in o_col_all:
stat[f'{col}_apply_mean_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg('mean').values
stat[f'{col}_apply_max_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
'max').values
stat[f'{col}_apply_min_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
'min').values
stat[f'{col}_apply_std_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
'std').values
stat[f'{col}_apply_median_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
'median').values
stat[f'{col}_apply_sum_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg('sum').values
stat[f'{col}_apply_max_min_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
diff_max_min).values
for col in o_cols:
stat[f'{col}_apply_nunique_1'] = data[data['date'] == dict_[month]].groupby('core_cust_id')[col].agg(
'nunique').values
stat1 = data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[
'prod_code'].count().reset_index()
stat1.columns = ['core_cust_id', f'uid_count_2']
stat1['pid_nunique_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[
'prod_code'].agg('nunique').values
stat1['pid_mean_count_2'] = stat1['uid_count_2'] / stat1['pid_nunique_2']
stat1['buy_num_2'] = data[data['date'] == dict_1[month][0]].groupby('core_cust_id')['if_buy'].agg(
'sum').values
stat1['buy_rate_2'] = data[data['date'] == dict_1[month][0]].groupby('core_cust_id')['if_buy'].agg(
'mean').values
for col in o_col_all:
stat1[f'{col}_apply_mean_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'mean').values
stat1[f'{col}_apply_max_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'max').values
stat1[f'{col}_apply_min_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'min').values
stat1[f'{col}_apply_std_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'std').values
stat1[f'{col}_apply_median_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'median').values
stat1[f'{col}_apply_sum_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'sum').values
stat1[f'{col}_apply_max_min_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
diff_max_min).values
for col in o_cols:
stat1[f'{col}_apply_nunique_2'] = \
data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby('core_cust_id')[col].agg(
'nunique').values
stat2 = data[data['date'] <= dict_[month]].groupby('core_cust_id')['prod_code'].count().reset_index()
stat2.columns = ['core_cust_id', f'uid_count_b']
stat2['pid_nunique_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')['prod_code'].agg('nunique').values
stat2['pid_mean_count_b'] = stat2['uid_count_b'] / stat2['pid_nunique_b']
stat2['buy_num_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'sum').values
stat2['buy_rate_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'mean').values
for col in o_col_all:
stat2[f'{col}_apply_mean_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
'mean').values
stat2[f'{col}_apply_max_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
'max').values
stat2[f'{col}_apply_min_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
'min').values
stat2[f'{col}_apply_std_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
'std').values
stat2[f'{col}_apply_median_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
'median').values
stat2[f'{col}_apply_sum_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg('sum').values
stat2[f'{col}_apply_max_min_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg(
diff_max_min).values
for col in o_cols:
stat2[f'{col}_apply_nunique_b'] = data[data['date'] <= dict_[month]].groupby('core_cust_id')[col].agg('nunique').values
stat3 = data[data['date'] < dict_[month]].groupby('core_cust_id')['prod_code'].count().reset_index()
stat3.columns = ['core_cust_id', 'uid_count_b1']
stat3['pid_nunique_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')['prod_code'].agg(
'nunique').values
stat3['pid_mean_count_b1'] = stat3['uid_count_b1'] / stat3['pid_nunique_b1']
stat3['buy_num_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'sum').values
stat3['buy_rate_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')['if_buy'].agg(
'mean').values
for col in o_col_all:
stat3[f'{col}_apply_mean_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
'mean').values
stat3[f'{col}_apply_max_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
'max').values
stat3[f'{col}_apply_min_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
'min').values
stat3[f'{col}_apply_std_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
'std').values
stat3[f'{col}_apply_median_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
'median').values
stat3[f'{col}_apply_sum_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg('sum').values
stat3[f'{col}_apply_max_min_b1'] = data[data['date'] < dict_[month]].groupby('core_cust_id')[col].agg(
diff_max_min).values
for col in o_cols:
stat3[f'{col}_apply_nunique_b1'] = data[
data['date'] < dict_[month]].groupby('core_cust_id')[col].agg('nunique').values
stat4 = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')['prod_code'].count().reset_index()
stat4.columns = ['core_cust_id', 'uid_count_b2']
stat4['pid_nunique_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')['prod_code'].agg(
'nunique').values
stat4['pid_mean_count_b2'] = stat4['uid_count_b2'] / stat4['pid_nunique_b2']
stat4['buy_num_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')['if_buy'].agg(
'sum').values
stat4['buy_rate_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')['if_buy'].agg(
'mean').values
for col in o_col_all:
stat4[f'{col}_apply_mean_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'mean').values
stat4[f'{col}_apply_max_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'max').values
stat4[f'{col}_apply_min_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'min').values
stat4[f'{col}_apply_std_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'std').values
stat4[f'{col}_apply_median_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'median').values
stat4[f'{col}_apply_sum_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'sum').values
stat4[f'{col}_apply_max_min_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
diff_max_min).values
for col in o_cols:
stat4[f'{col}_apply_nunique_b2'] = data[data['date'] < dict_1[month][0]].groupby('core_cust_id')[col].agg(
'nunique').values
stat_5 = data[data['date'] < dict_[month]].groupby(['core_cust_id', 'prod_code'])['流水号'].count().reset_index()
stat_5.columns = ['core_cust_id', 'prod_code', 'flow_redu_b1']
stat_5['cp_buy_num_b1'] = data[data['date'] < dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'sum').values
stat_5['cp_buy_rate_b1'] = data[data['date'] < dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'mean').values
stat_6 = data[data['date'] <= dict_[month]].groupby(['core_cust_id', 'prod_code'])['流水号'].count().reset_index()
stat_6.columns = ['core_cust_id', 'prod_code', 'flow_redu_b']
stat_6['cp_buy_num_b'] = data[data['date'] <= dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'sum').values
stat_6['cp_buy_rate_b'] = data[data['date'] <= dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'mean').values
stat_7 = data[data['date'] < dict_1[month][0]].groupby(['core_cust_id', 'prod_code'])[
'流水号'].count().reset_index()
stat_7.columns = ['core_cust_id', 'prod_code', 'flow_redu_b2']
stat_7['cp_buy_num_b2'] = data[data['date'] < dict_1[month][0]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'sum').values
stat_7['cp_buy_rate_b2'] = data[data['date'] < dict_1[month][0]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'mean').values
stat_8 = data[data['date'] == dict_[month]].groupby(['core_cust_id', 'prod_code'])['流水号'].count().reset_index()
stat_8.columns = ['core_cust_id', 'prod_code', 'flow_redu_1']
stat_8['cp_buy_num_1'] = data[data['date'] == dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'sum').values
stat_8['cp_buy_rate_1'] = data[data['date'] == dict_[month]].groupby(['core_cust_id', 'prod_code'])['if_buy'].agg(
'mean').values
stat_9 = data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby(
['core_cust_id', 'prod_code'])['流水号'].count().reset_index()
stat_9.columns = ['core_cust_id', 'prod_code', 'flow_redu_2']
stat_9['cp_buy_num_2'] = data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby(
['core_cust_id', 'prod_code'])['if_buy'].agg('sum').values
stat_9['cp_buy_rate_2'] = data[(data['date'] == dict_1[month][0]) & (data['date'] == dict_1[month][1])].groupby(
['core_cust_id', 'prod_code'])['if_buy'].agg('mean').values
tmp_df = tmp_df.merge(stat, on=['core_cust_id'], how='left')
tmp_df = tmp_df.merge(stat1, on=['core_cust_id'], how='left')
tmp_df = tmp_df.merge(stat2, on=['core_cust_id'], how='left')
tmp_df = tmp_df.merge(stat3, on=['core_cust_id'], how='left')
tmp_df = tmp_df.merge(stat4, on=['core_cust_id'], how='left')
tmp_df = tmp_df.merge(stat_5, on=['core_cust_id', 'prod_code'], how='left')
tmp_df = tmp_df.merge(stat_6, on=['core_cust_id', 'prod_code'], how='left')
tmp_df = tmp_df.merge(stat_7, on=['core_cust_id', 'prod_code'], how='left')
tmp_df = tmp_df.merge(stat_8, on=['core_cust_id', 'prod_code'], how='left')
tmp_df = tmp_df.merge(stat_9, on=['core_cust_id', 'prod_code'], how='left')
freq_lis = ['业务代码', '渠道标识', '资金状态', '交易状态']
dfs.append(tmp_df)
df = pd.concat(dfs).reset_index(drop=True)
2021-07-01
2021-08-01
2021-09-01
2021-10-01
df.nunique()
id 390977
core_cust_id 212637
prod_code 2
a2 1
a3 4
y 2
type 2
uid_count_1 22
pid_nunique_1 6
pid_mean_count_1 33
净值_apply_mean_1 702
净值_apply_max_1 42
净值_apply_min_1 43
净值_apply_std_1 812
净值_apply_median_1 202
净值_apply_sum_1 835
净值_apply_max_min_1 159
总金额_apply_mean_1 910
总金额_apply_max_1 689
总金额_apply_min_1 504
总金额_apply_std_1 489
总金额_apply_median_1 724
总金额_apply_sum_1 992
总金额_apply_max_min_1 385
超额管理费_apply_mean_1 796
超额管理费_apply_max_1 585
超额管理费_apply_min_1 443
超额管理费_apply_std_1 416
超额管理费_apply_median_1 641
超额管理费_apply_sum_1 846
超额管理费_apply_max_min_1 311
apply_amt_apply_mean_1 703
apply_amt_apply_max_1 377
apply_amt_apply_min_1 311
apply_amt_apply_std_1 693
apply_amt_apply_median_1 434
apply_amt_apply_sum_1 1037
apply_amt_apply_max_min_1 259
业务代码_apply_nunique_1 6
渠道标识_apply_nunique_1 2
资金状态_apply_nunique_1 5
交易状态_apply_nunique_1 3
deal_bus_apply_nunique_1 8
deal_bus_channel_apply_nunique_1 8
deal_bus_fund_apply_nunique_1 8
deal_bus_c_f_apply_nunique_1 8
uid_count_2 0
pid_nunique_2 0
pid_mean_count_2 0
净值_apply_mean_2 0
净值_apply_max_2 0
净值_apply_min_2 0
净值_apply_std_2 0
净值_apply_median_2 0
净值_apply_sum_2 0
净值_apply_max_min_2 0
总金额_apply_mean_2 0
总金额_apply_max_2 0
总金额_apply_min_2 0
总金额_apply_std_2 0
总金额_apply_median_2 0
总金额_apply_sum_2 0
总金额_apply_max_min_2 0
超额管理费_apply_mean_2 0
超额管理费_apply_max_2 0
超额管理费_apply_min_2 0
超额管理费_apply_std_2 0
超额管理费_apply_median_2 0
超额管理费_apply_sum_2 0
超额管理费_apply_max_min_2 0
apply_amt_apply_mean_2 0
apply_amt_apply_max_2 0
apply_amt_apply_min_2 0
apply_amt_apply_std_2 0
apply_amt_apply_median_2 0
apply_amt_apply_sum_2 0
apply_amt_apply_max_min_2 0
业务代码_apply_nunique_2 0
渠道标识_apply_nunique_2 0
资金状态_apply_nunique_2 0
交易状态_apply_nunique_2 0
deal_bus_apply_nunique_2 0
deal_bus_channel_apply_nunique_2 0
deal_bus_fund_apply_nunique_2 0
deal_bus_c_f_apply_nunique_2 0
uid_count_b 48
pid_nunique_b 14
pid_mean_count_b 146
净值_apply_mean_b 4033
净值_apply_max_b 76
净值_apply_min_b 62
净值_apply_std_b 5427
净值_apply_median_b 453
净值_apply_sum_b 4794
净值_apply_max_min_b 414
总金额_apply_mean_b 1790
总金额_apply_max_b 839
总金额_apply_min_b 11
总金额_apply_std_b 1902
总金额_apply_median_b 366
总金额_apply_sum_b 1936
总金额_apply_max_min_b 835
超额管理费_apply_mean_b 1438
超额管理费_apply_max_b 689
超额管理费_apply_min_b 1
超额管理费_apply_std_b 1589
超额管理费_apply_median_b 294
超额管理费_apply_sum_b 1529
超额管理费_apply_max_min_b 689
apply_amt_apply_mean_b 2578
apply_amt_apply_max_b 618
apply_amt_apply_min_b 443
apply_amt_apply_std_b 4182
apply_amt_apply_median_b 725
apply_amt_apply_sum_b 3875
apply_amt_apply_max_min_b 585
业务代码_apply_nunique_b 7
渠道标识_apply_nunique_b 3
资金状态_apply_nunique_b 5
交易状态_apply_nunique_b 4
deal_bus_apply_nunique_b 10
deal_bus_channel_apply_nunique_b 10
deal_bus_fund_apply_nunique_b 11
deal_bus_c_f_apply_nunique_b 11
uid_count_b1 43
pid_nunique_b1 13
pid_mean_count_b1 131
净值_apply_mean_b1 3199
净值_apply_max_b1 69
净值_apply_min_b1 54
净值_apply_std_b1 4263
净值_apply_median_b1 376
净值_apply_sum_b1 3804
净值_apply_max_min_b1 340
总金额_apply_mean_b1 1170
总金额_apply_max_b1 621
总金额_apply_min_b1 11
总金额_apply_std_b1 1239
总金额_apply_median_b1 252
总金额_apply_sum_b1 1272
总金额_apply_max_min_b1 617
超额管理费_apply_mean_b1 888
超额管理费_apply_max_b1 481
超额管理费_apply_min_b1 1
超额管理费_apply_std_b1 967
超额管理费_apply_median_b1 189
超额管理费_apply_sum_b1 948
超额管理费_apply_max_min_b1 481
apply_amt_apply_mean_b1 2205
apply_amt_apply_max_b1 594
apply_amt_apply_min_b1 435
apply_amt_apply_std_b1 3452
apply_amt_apply_median_b1 685
apply_amt_apply_sum_b1 3329
apply_amt_apply_max_min_b1 550
业务代码_apply_nunique_b1 7
渠道标识_apply_nunique_b1 3
资金状态_apply_nunique_b1 5
交易状态_apply_nunique_b1 4
deal_bus_apply_nunique_b1 10
deal_bus_channel_apply_nunique_b1 10
deal_bus_fund_apply_nunique_b1 10
deal_bus_c_f_apply_nunique_b1 10
uid_count_b2 36
pid_nunique_b2 13
pid_mean_count_b2 114
净值_apply_mean_b2 2407
净值_apply_max_b2 61
净值_apply_min_b2 46
净值_apply_std_b2 3137
净值_apply_median_b2 284
净值_apply_sum_b2 2857
净值_apply_max_min_b2 268
总金额_apply_mean_b2 581
总金额_apply_max_b2 332
总金额_apply_min_b2 11
总金额_apply_std_b2 621
总金额_apply_median_b2 134
总金额_apply_sum_b2 658
总金额_apply_max_min_b2 328
超额管理费_apply_mean_b2 347
超额管理费_apply_max_b2 203
超额管理费_apply_min_b2 1
超额管理费_apply_std_b2 381
超额管理费_apply_median_b2 77
超额管理费_apply_sum_b2 393
超额管理费_apply_max_min_b2 203
apply_amt_apply_mean_b2 1893
apply_amt_apply_max_b2 562
apply_amt_apply_min_b2 422
apply_amt_apply_std_b2 2781
apply_amt_apply_median_b2 653
apply_amt_apply_sum_b2 2815
apply_amt_apply_max_min_b2 493
业务代码_apply_nunique_b2 7
渠道标识_apply_nunique_b2 3
资金状态_apply_nunique_b2 5
交易状态_apply_nunique_b2 4
deal_bus_apply_nunique_b2 10
deal_bus_channel_apply_nunique_b2 10
deal_bus_fund_apply_nunique_b2 10
deal_bus_c_f_apply_nunique_b2 10
flow_redu_b1 26
flow_redu_b 28
flow_redu_b2 22
flow_redu_1 17
flow_redu_2 0
dtype: int64
390977
gc.collect()
68
客户风险表
产品风险等级和客户风险等级的大小关系
e1 = pd.read_csv('e_bc.csv')e1.head(2)
|
core_cust_id |
e1 |
e2 |
0 |
9361c799f2 |
3 |
20211113 |
1 |
9cb2061f63 |
2 |
20211014 |
|
core_cust_id |
e1 |
e2 |
0 |
d4931873cb |
3 |
20200608 |
1 |
af52580627 |
3 |
20200330 |
e = pd.concat([e, e1])
e.info()
Int64Index: 351573 entries, 0 to 35456Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 core_cust_id 351573 non-null object 1 e1 351573 non-null int64 2 e2 351573 non-null int64 dtypes: int64(2), object(1)memory usage: 10.7+ MB
len(e.drop_duplicates())
351523
e['e2'] = e['e2'].astype('str')
e['date'] = e['e2'].apply(lambda x:x[:6])
e['date'].value_counts().sort_index()
201303 30201304 3201305 9201306 7201307 12201308 19201309 25201310 30201311 28201312 52201401 43201402 31201403 65201404 54201405 100201406 108201407 80201408 101201409 209201410 199201411 205201412 213201502 1201503 2201504 18201505 78201506 137201507 358201508 823201509 1202201510 1127201511 1080201512 810201601 564201602 731201603 1204201604 829201605 646201606 526201607 515201608 547201609 819201610 944201611 1052201612 932201701 628201702 2213201703 1850201704 1110201705 1138201706 1304201707 1381201708 2389201709 2127201710 1489201711 3243201712 2703201801 1683201802 2160201803 2901201804 2376201805 2377201806 2250201807 2876201808 4684201809 4648201810 4484201811 4459201812 5921201901 4310201902 3310201903 4164201904 3573201905 3974201906 3316201907 3585201908 3639201909 4491201910 4556201911 3937201912 6040202001 6065202002 2832202003 5075202004 8605202005 8177202006 7860202007 8949202008 8705202009 10822202010 8405202011 8608202012 10566202101 7893202102 5814202103 8806202104 9652202105 8180202106 9567202107 13409202108 12828202109 17441202110 20209202111 15248Name: date, dtype: int64
- #登记和更新风险等级人数,与购买有关系么?
- 与df合并后查看一下在各产品中的表现
2021-07-012021-08-012021-09-012021-10-01
df.head(2)
|
id |
core_cust_id |
prod_code |
a2 |
a3 |
y |
type |
uid_count_1 |
pid_nunique_1 |
pid_mean_count_1 |
净值_apply_mean_1 |
净值_apply_max_1 |
净值_apply_min_1 |
净值_apply_std_1 |
净值_apply_median_1 |
净值_apply_sum_1 |
净值_apply_max_min_1 |
总金额_apply_mean_1 |
总金额_apply_max_1 |
总金额_apply_min_1 |
总金额_apply_std_1 |
总金额_apply_median_1 |
总金额_apply_sum_1 |
总金额_apply_max_min_1 |
超额管理费_apply_mean_1 |
超额管理费_apply_max_1 |
超额管理费_apply_min_1 |
超额管理费_apply_std_1 |
超额管理费_apply_median_1 |
超额管理费_apply_sum_1 |
超额管理费_apply_max_min_1 |
apply_amt_apply_mean_1 |
apply_amt_apply_max_1 |
apply_amt_apply_min_1 |
apply_amt_apply_std_1 |
apply_amt_apply_median_1 |
apply_amt_apply_sum_1 |
apply_amt_apply_max_min_1 |
业务代码_apply_nunique_1 |
渠道标识_apply_nunique_1 |
资金状态_apply_nunique_1 |
交易状态_apply_nunique_1 |
deal_bus_apply_nunique_1 |
deal_bus_channel_apply_nunique_1 |
deal_bus_fund_apply_nunique_1 |
deal_bus_c_f_apply_nunique_1 |
uid_count_2 |
pid_nunique_2 |
pid_mean_count_2 |
净值_apply_mean_2 |
净值_apply_max_2 |
净值_apply_min_2 |
净值_apply_std_2 |
净值_apply_median_2 |
净值_apply_sum_2 |
净值_apply_max_min_2 |
总金额_apply_mean_2 |
总金额_apply_max_2 |
总金额_apply_min_2 |
总金额_apply_std_2 |
总金额_apply_median_2 |
总金额_apply_sum_2 |
总金额_apply_max_min_2 |
超额管理费_apply_mean_2 |
超额管理费_apply_max_2 |
超额管理费_apply_min_2 |
超额管理费_apply_std_2 |
超额管理费_apply_median_2 |
超额管理费_apply_sum_2 |
超额管理费_apply_max_min_2 |
apply_amt_apply_mean_2 |
apply_amt_apply_max_2 |
apply_amt_apply_min_2 |
apply_amt_apply_std_2 |
apply_amt_apply_median_2 |
apply_amt_apply_sum_2 |
apply_amt_apply_max_min_2 |
业务代码_apply_nunique_2 |
渠道标识_apply_nunique_2 |
资金状态_apply_nunique_2 |
交易状态_apply_nunique_2 |
deal_bus_apply_nunique_2 |
deal_bus_channel_apply_nunique_2 |
deal_bus_fund_apply_nunique_2 |
deal_bus_c_f_apply_nunique_2 |
uid_count_b |
pid_nunique_b |
pid_mean_count_b |
净值_apply_mean_b |
净值_apply_max_b |
净值_apply_min_b |
净值_apply_std_b |
净值_apply_median_b |
净值_apply_sum_b |
净值_apply_max_min_b |
总金额_apply_mean_b |
总金额_apply_max_b |
总金额_apply_min_b |
总金额_apply_std_b |
总金额_apply_median_b |
总金额_apply_sum_b |
总金额_apply_max_min_b |
超额管理费_apply_mean_b |
超额管理费_apply_max_b |
超额管理费_apply_min_b |
超额管理费_apply_std_b |
超额管理费_apply_median_b |
超额管理费_apply_sum_b |
超额管理费_apply_max_min_b |
apply_amt_apply_mean_b |
apply_amt_apply_max_b |
apply_amt_apply_min_b |
apply_amt_apply_std_b |
apply_amt_apply_median_b |
apply_amt_apply_sum_b |
apply_amt_apply_max_min_b |
业务代码_apply_nunique_b |
渠道标识_apply_nunique_b |
资金状态_apply_nunique_b |
交易状态_apply_nunique_b |
deal_bus_apply_nunique_b |
deal_bus_channel_apply_nunique_b |
deal_bus_fund_apply_nunique_b |
deal_bus_c_f_apply_nunique_b |
uid_count_b1 |
pid_nunique_b1 |
pid_mean_count_b1 |
净值_apply_mean_b1 |
净值_apply_max_b1 |
净值_apply_min_b1 |
净值_apply_std_b1 |
净值_apply_median_b1 |
净值_apply_sum_b1 |
净值_apply_max_min_b1 |
总金额_apply_mean_b1 |
总金额_apply_max_b1 |
总金额_apply_min_b1 |
总金额_apply_std_b1 |
总金额_apply_median_b1 |
总金额_apply_sum_b1 |
总金额_apply_max_min_b1 |
超额管理费_apply_mean_b1 |
超额管理费_apply_max_b1 |
超额管理费_apply_min_b1 |
超额管理费_apply_std_b1 |
超额管理费_apply_median_b1 |
超额管理费_apply_sum_b1 |
超额管理费_apply_max_min_b1 |
apply_amt_apply_mean_b1 |
apply_amt_apply_max_b1 |
apply_amt_apply_min_b1 |
apply_amt_apply_std_b1 |
apply_amt_apply_median_b1 |
apply_amt_apply_sum_b1 |
apply_amt_apply_max_min_b1 |
业务代码_apply_nunique_b1 |
渠道标识_apply_nunique_b1 |
资金状态_apply_nunique_b1 |
交易状态_apply_nunique_b1 |
deal_bus_apply_nunique_b1 |
deal_bus_channel_apply_nunique_b1 |
deal_bus_fund_apply_nunique_b1 |
deal_bus_c_f_apply_nunique_b1 |
uid_count_b2 |
pid_nunique_b2 |
pid_mean_count_b2 |
净值_apply_mean_b2 |
净值_apply_max_b2 |
净值_apply_min_b2 |
净值_apply_std_b2 |
净值_apply_median_b2 |
净值_apply_sum_b2 |
净值_apply_max_min_b2 |
总金额_apply_mean_b2 |
总金额_apply_max_b2 |
总金额_apply_min_b2 |
总金额_apply_std_b2 |
总金额_apply_median_b2 |
总金额_apply_sum_b2 |
总金额_apply_max_min_b2 |
超额管理费_apply_mean_b2 |
超额管理费_apply_max_b2 |
超额管理费_apply_min_b2 |
超额管理费_apply_std_b2 |
超额管理费_apply_median_b2 |
超额管理费_apply_sum_b2 |
超额管理费_apply_max_min_b2 |
apply_amt_apply_mean_b2 |
apply_amt_apply_max_b2 |
apply_amt_apply_min_b2 |
apply_amt_apply_std_b2 |
apply_amt_apply_median_b2 |
apply_amt_apply_sum_b2 |
apply_amt_apply_max_min_b2 |
业务代码_apply_nunique_b2 |
渠道标识_apply_nunique_b2 |
资金状态_apply_nunique_b2 |
交易状态_apply_nunique_b2 |
deal_bus_apply_nunique_b2 |
deal_bus_channel_apply_nunique_b2 |
deal_bus_fund_apply_nunique_b2 |
deal_bus_c_f_apply_nunique_b2 |
flow_redu_b1 |
flow_redu_b |
flow_redu_b2 |
flow_redu_1 |
flow_redu_2 |
risk_count |
risk_change |
risk_level_mean |
risk_max |
risk_min |
risk_count_b |
risk_change_b |
risk_level_mean_b |
risk_max_b |
risk_min_b |
客户风险等级 |
date |
risk_diff_date |
0 |
70e7f0465877447aa44c8d3120d0414c |
9cb1f66b15 |
SSTJMZKF001 |
3 |
2021-07-01 |
0.0 |
train |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
2.0 |
2.0 |
1.5 |
2.0 |
1.0 |
2.0 |
201910 |
197.0 |
1 |
0df607cd59144c9fa0ddd0863372a0de |
c446c41e48 |
SSTJMZKF002 |
3 |
2021-07-01 |
0.0 |
train |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
B产品的产品表与df合并后都只有一个值,所以客户风险承受能力就是客户风险等级
df.shape[0]
390977
客户信息
d_bc = pd.read_csv('d_bc.csv')
d_bc.head(2)
|
core_cust_id |
d1 |
d2 |
d3 |
0 |
9cb2061f63 |
1 |
4.0 |
60 |
1 |
e538d4d33a |
1 |
4.0 |
28 |
d = pd.read_csv('d.csv')
|
core_cust_id |
d1 |
d2 |
d3 |
0 |
d4931873cb |
1 |
4.0 |
40 |
1 |
af52580627 |
2 |
3.0 |
32 |
d = pd.concat([d, d_bc])
d.info()
Int64Index: 271919 entries, 0 to 7863Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 core_cust_id 271919 non-null object 1 d1 271919 non-null int64 2 d2 236613 non-null float64 3 d3 271919 non-null int64 dtypes: float64(1), int64(2), object(1)memory usage: 10.4+ MB
sns.countplot(x='d1', hue='d2', data=d)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-BdhNiP2a-1643361700228)(output_94_1.png)]
from scipy.stats import *
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-C0GzDFdr-1643361700229)(output_95_1.png)]
sns.distplot(d[d['d1']==2]['d3'],hist=False, fit=skewnorm)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-gYKp3dpM-1643361700230)(output_96_1.png)]
d[(d['d3']==55) & (d['d1']==1)].shape[0], d[d['d3']==55].shape[0]
(2062, 4075)
d[(d['d3']==72) & (d['d1']==1)].shape[0], d[d['d3']==72].shape[0]
(1095, 2204)
d['age_gap'] = d['d3'].apply(lambda x: 1 if x<56 and x>72 else 2)
d['age_grade'] = d['d3'] / d['d2']
d['d3'].max(), d['d3'].min()
(101, 18)
d['d3'].value_counts()
32 906731 890233 840834 809735 765630 763139 741736 699829 695737 693538 690040 638028 626327 563942 563743 550848 546058 543850 539949 531944 530451 527445 523446 510947 510841 505526 499653 499252 480657 431656 422259 420925 420355 407564 390254 383767 364163 342624 341866 340165 338968 317961 313662 299469 294623 262170 259560 252271 234772 220422 198973 188674 178021 161475 147520 129076 122977 113378 98079 87019 79180 71181 61282 60483 50584 42685 35686 27587 22018 21689 13588 12990 7291 5592 4493 1794 1395 796 497 399 398 1100 1101 1Name: d3, dtype: int64
age_counts = d['d3'].value_counts().reset_index()age_counts.columns = ['d3', 'age_频度']d = d.merge(age_counts, on=['d3'], how='left')
d['age_freq3'] = pd.cut(d['age_频度'], 3, labels=False)d['age_freq5'] = pd.cut(d['age_频度'], 5, labels=False)d['age_freq7'] = pd.cut(d['age_频度'], 7, labels=False)d['age_freq9'] = pd.cut(d['age_频度'], 9, labels=False)d['age_freq11'] = pd.cut(d['age_频度'], 11, labels=False)
cust_grade_counts = d['d2'].value_counts().reset_index()cust_grade_counts.columns = ['d2', 'cust_grade_频度'] d = d.merge(cust_grade_counts, on=['d2'], how='left')
d.head()
|
core_cust_id |
d1 |
d2 |
d3 |
age_gap |
age_grade |
age_频度 |
age_freq3 |
age_freq5 |
age_freq7 |
age_freq9 |
age_freq11 |
cust_grade_频度 |
0 |
d4931873cb |
1 |
4.0 |
40 |
2 |
10.000000 |
6380 |
2 |
3 |
4 |
6 |
7 |
171960.0 |
1 |
af52580627 |
2 |
3.0 |
32 |
2 |
10.666667 |
9067 |
2 |
4 |
6 |
8 |
10 |
41350.0 |
2 |
8a11960fe9 |
1 |
NaN |
58 |
2 |
NaN |
5438 |
1 |
2 |
4 |
5 |
6 |
NaN |
3 |
cfeaff22c6 |
2 |
4.0 |
53 |
2 |
13.250000 |
4992 |
1 |
2 |
3 |
4 |
6 |
171960.0 |
4 |
17fb48e6ce |
1 |
4.0 |
34 |
2 |
8.500000 |
8097 |
2 |
4 |
6 |
8 |
9 |
171960.0 |
df = df.merge(d, on='core_cust_id', how='left')
df.shape[0]
390977
gc.collect()
91
APP点击行为
r1 = pd.read_csv('r_bc.csv')
r = pd.read_csv('r.csv')
r = pd.concat([r, r1])
r.head(2)
|
r1 |
core_cust_id |
r3 |
prod_code |
r5 |
0 |
20210825_11277467902 |
a15a1d681a |
2 |
91318017 |
2021-08-25 14:18:10 |
1 |
20210824_11229966502 |
a15a1d681a |
1 |
GRHLA20211386 |
2021-08-24 14:55:49 |
r['r5'].min(), r['r5'].max()
('2021-01-01 00:06:51', '2021-10-24 23:59:59')
r.info()
Int64Index: 1054997 entries, 0 to 277921Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 r1 1054997 non-null object 1 core_cust_id 1054997 non-null object 2 r3 1054997 non-null int64 3 prod_code 1054990 non-null object 4 r5 1054997 non-null objectdtypes: int64(1), object(4)memory usage: 48.3+ MB
r['datetime'] = pd.to_datetime(r['r5'],errors='coerce', format='%Y-%m-%d %H:%M:%S')
r.head(3)
|
r1 |
core_cust_id |
r3 |
prod_code |
r5 |
datetime |
click_month |
click_day |
click_hour |
click_minute |
click_second |
click_dayofyear |
click_weekofyear |
click_weekday |
click_quarter |
click_is_wknd |
0 |
20210825_11277467902 |
a15a1d681a |
2 |
91318017 |
2021-08-25 14:18:10 |
2021-08-25 14:18:10 |
8 |
25 |
14 |
18 |
10 |
237 |
34 |
2 |
3 |
0 |
1 |
20210824_11229966502 |
a15a1d681a |
1 |
GRHLA20211386 |
2021-08-24 14:55:49 |
2021-08-24 14:55:49 |
8 |
24 |
14 |
55 |
49 |
236 |
34 |
1 |
3 |
0 |
2 |
20210824_11234138402 |
a15a1d681a |
1 |
GRHLA20211386 |
2021-08-24 16:17:15 |
2021-08-24 16:17:15 |
8 |
24 |
16 |
17 |
15 |
236 |
34 |
1 |
3 |
0 |
r.info()
Int64Index: 1054997 entries, 0 to 277921Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 r1 1054997 non-null object 1 core_cust_id 1054997 non-null object 2 r3 1054997 non-null int64 3 prod_code 1054990 non-null object 4 r5 1054997 non-null object 5 datetime 1054997 non-null datetime64[ns] 6 click_month 1054997 non-null int32 7 click_day 1054997 non-null int32 8 click_hour 1054997 non-null int32 9 click_minute 1054997 non-null int32 10 click_second 1054997 non-null int32 11 click_dayofyear 1054997 non-null int32 12 click_weekofyear 1054997 non-null int32 13 click_weekday 1054997 non-null int32 14 click_quarter 1054997 non-null int32 15 click_is_wknd 1054997 non-null int64 dtypes: datetime64[ns](1), int32(9), int64(2), object(4)memory usage: 100.6+ MB
sns.countplot(x='click_month', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-3fAoiLjH-1643361700231)(output_120_1.png)]
sns.countplot(x='click_day', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-DGb1fJ9N-1643361700232)(output_121_1.png)]
sns.countplot(x='click_hour', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-h1pxP64s-1643361700233)(output_122_1.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-yCOe4XRn-1643361700234)(output_123_1.png)]
sns.countplot(x='click_weekofyear', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-j69M5jTj-1643361700234)(output_124_1.png)]
sns.countplot(x='click_weekday', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-VnrTuads-1643361700235)(output_125_1.png)]
sns.countplot(x='click_is_wknd', data=r)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-n7j5bRp2-1643361700236)(output_126_1.png)]
r['r3'].nunique()
2
合并r表:
r.columns
Index(['r1', 'core_cust_id', 'r3', 'prod_code', 'r5', 'click_day', 'click_hour', 'click_minute', 'click_second', 'click_time', 'click_month', 'click_d', 'datetime', 'weekofyear_r', 'weekday_r', 'quarter_r', 'is_wknd_r'], dtype='object')
- 点击类型r3: 是否有变化–nunique有几个值
- click_hour: 众数mode,min,max,median,nunique
- click_month: 众数mode,min,max,median,nunique,月均次数
- click_d: 众数mode,min,max,median,nunique,日均次数***---------- #每个月中哪一天
- weekofyear: 众数mode,min,max,median,nunique,周均次数
- weekday: 众数mode,min,max,median,nunique,周内日均次数
- quarter: 众数mode,min,max,median,nunique,季度均次数
- is_wknd: 众数mode,sum,median,
- 以上所有需要补充sum和mean么?
r['r5'].max(), r['r5'].min()
('2021-10-24 23:59:59', '2021-01-01 00:06:51')
r['date'] = r['r5'].apply(lambda x: x[:7])
dict_ = {'2021-07-01':'2021-06', '2021-08-01':'2021-07', '2021-09-01':'2021-08', '2021-10-01':'2021-09'}
dict_1 = {'2021-07-01':'2021-05', '2021-08-01':'2021-06', '2021-09-01':'2021-07', '2021-10-01':'2021-08'}
r_col = [ 'click_hour', 'click_month', 'click_d', 'weekofyear_r', 'weekday_r', 'quarter_r']
dfs = []
for month in sorted(df['a3'].unique()):
print(month)
tmp_df = df[df['a3'] == month]
stat_1 = r[(r['date'] <= dict_[month])].groupby('core_cust_id')['prod_code'].count().reset_index()
stat_1.columns = ['core_cust_id','uid_click_action_count_b']
stat_1['pid_click_action_nunique_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')['prod_code'].agg('nunique').values
stat_1['uid_click_r3_nunique_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')['r3'].agg('nunique').values
stat_1['uid_click_iswknd_sum_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')['is_wknd_r'].agg('sum').values
stat_1['uid_click_iswknd_median_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')['is_wknd_r'].agg('median').values
for col in r_col:
stat_1[f'uid_click_{col}_median_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')[col].agg('median').values
stat_1[f'uid_click_{col}_sum_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')[col].agg('sum').values
stat_1[f'uid_click_{col}_max_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')[col].agg('max').values
stat_1[f'uid_click_{col}_min_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')[col].agg('min').values
stat_1[f'uid_click_{col}_nunique_b'] = r[(r['date'] <= dict_[month])].groupby('core_cust_id')[col].agg('nunique').values
stat_1[f'uid_click_{col}_smean_b'] = stat_1['uid_click_action_count_b'] / stat_1[f'uid_click_{col}_nunique_b']
stat_2 = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')['prod_code'].count().reset_index()
stat_2.columns = ['core_cust_id','uid_click_action_count_b1']
stat_2['pid_click_action_nunique_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')['prod_code'].agg('nunique').values
stat_2['uid_click_r3_nunique_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')['r3'].agg('nunique').values
stat_2['uid_click_iswknd_sum_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')['is_wknd_r'].agg('sum').values
stat_2['uid_click_iswknd_median_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')['is_wknd_r'].agg('median').values
for col in r_col:
stat_2[f'uid_click_{col}_median_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')[col].agg('median').values
stat_2[f'uid_click_{col}_sum_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')[col].agg('sum').values
stat_2[f'uid_click_{col}_max_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')[col].agg('max').values
stat_2[f'uid_click_{col}_min_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')[col].agg('min').values
stat_2[f'uid_click_{col}_nunique_b1'] = r[(r['date'] <= dict_1[month])].groupby('core_cust_id')[col].agg('nunique').values
stat_2[f'uid_click_{col}_smean_b1'] = stat_2['uid_click_action_count_b1'] / stat_2[f'uid_click_{col}_nunique_b1']
tmp_df = tmp_df.merge(stat_1, on='core_cust_id', how='left')
tmp_df = tmp_df.merge(stat_2, on='core_cust_id', how='left')
stat_3 = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])['r1'].count().reset_index()
stat_3.columns = ['core_cust_id', 'prod_code','cp_click_action_count_b']
stat_3['cp_click_r3_nunique_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])['r3'].agg('nunique').values
stat_3['cp_click_iswknd_sum_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])['is_wknd_r'].agg('sum').values
stat_3['cp_click_iswknd_median_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])['is_wknd_r'].agg('median').values
for col in r_col:
stat_3[f'cp_click_{col}_median_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('median').values
stat_3[f'cp_click_{col}_sum_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('sum').values
stat_3[f'cp_click_{col}_max_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('max').values
stat_3[f'cp_click_{col}_min_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('min').values
stat_3[f'cp_click_{col}_nunique_b'] = r[(r['date'] <= dict_[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('nunique').values
stat_3[f'cp_click_{col}_smean_b'] = stat_3['cp_click_action_count_b'] / stat_3[f'cp_click_{col}_nunique_b']
stat_4 = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])['r1'].count().reset_index()
stat_4.columns = ['core_cust_id', 'prod_code', 'cp_click_action_count_b1']
stat_4['cp_click_r3_nunique_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])['r3'].agg('nunique').values
stat_4['cp_click_iswknd_sum_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])['is_wknd_r'].agg('sum').values
stat_4['cp_click_iswknd_median_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])['is_wknd_r'].agg('median').values
for col in r_col:
stat_4[f'cp_click_{col}_median_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('median').values
stat_4[f'cp_click_{col}_sum_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('sum').values
stat_4[f'cp_click_{col}_max_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('max').values
stat_4[f'cp_click_{col}_min_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('min').values
stat_4[f'cp_click_{col}_nunique_b1'] = r[(r['date'] <= dict_1[month])].groupby(['core_cust_id', 'prod_code'])[col].agg('nunique').values
stat_4[f'cp_click_{col}_smean_b1'] = stat_4['cp_click_action_count_b1'] / stat_4[f'cp_click_{col}_nunique_b1']
tmp_df = tmp_df.merge(stat_3, on=['core_cust_id', 'prod_code'], how='left')
tmp_df = tmp_df.merge(stat_4, on=['core_cust_id', 'prod_code'], how='left')
dfs.append(tmp_df)
df = pd.concat(dfs).reset_index(drop=True)
2021-07-012021-08-012021-09-012021-10-01
gc.collect()
31
df.shape[0]
390977
账户交易流水
s = pd.read_csv(path2 + 's.csv', thousands=',')s.head(2)
|
s1 |
s2 |
s3 |
s4 |
s5 |
s6 |
s7 |
0 |
01004320210415202104151FT21105818179741 |
4 |
NaN |
75617.810 |
2021-04-15 |
9809df0ffe |
2021-04-15 |
1 |
01004320210630202106301FT21181810270081 |
4 |
NaN |
75635.105 |
2021-06-30 |
9809df0ffe |
2021-06-30 |
- s2 交易类型代码
- s3 借方客户编号
- s4 借方金额
- s5 借方交易日期
- s6 贷方客户编号
- s7 处理日期
s.nunique()
s.info()
s['deal_date'] = s['s5'].apply(lambda x:x[:7])s['handle_date'] = s['s7'].apply(lambda x:x[:7])
l5 = s['deal_date'].tolist()
l7 = s['handle_date'].tolist()
- 将s3借方:花钱吃饭;s6贷方:发工资到银行卡,分开再合并
- s3对应的s4为负值,s6对应的s4为正值
- s1可以删除用rank重新排序
s1 = s[s['s3'].notnull()]s1.drop('s6', axis=1, inplace=True)
s1['s4'] = s1['s4']*(-1)
不要交易号,因为有一些数据同一天发生交易的金额一样,所以判断可能是付款取消,再次付款导致
s1.rename(columns={'s3':'core_cust_id'}, inplace=True)
s1.drop_duplicates(['s2', 'core_cust_id', 's4', 's5'], keep='last', inplace=True)
s1.head(2)
|
s1 |
s2 |
core_cust_id |
s4 |
s5 |
s7 |
deal_date |
handle_date |
190 |
01004320210815202108151FT21227030881371 |
6 |
4b3451caa2 |
-12110.0 |
2021-08-15 |
2021-08-15 |
2021-08 |
2021-08 |
200 |
01004320210809202108091FT21221429469501 |
6 |
a030fed1b |
-150110.0 |
2021-08-09 |
2021-08-09 |
2021-08 |
2021-08 |
s2 = s[s['s6'].notnull()]
s2.drop('s3', axis=1, inplace=True)
s2.rename(columns={'s6':'core_cust_id'}, inplace=True)
s2.drop_duplicates(['s2', 's4', 's5', 'core_cust_id'], keep='last', inplace=True)
s2.head(2)
|
s1 |
s2 |
s4 |
s5 |
core_cust_id |
s7 |
deal_date |
handle_date |
0 |
01004320210415202104151FT21105818179741 |
4 |
75617.810 |
2021-04-15 |
9809df0ffe |
2021-04-15 |
2021-04 |
2021-04 |
1 |
01004320210630202106301FT21181810270081 |
4 |
75635.105 |
2021-06-30 |
9809df0ffe |
2021-06-30 |
2021-06 |
2021-06 |
ss = pd.concat([s1, s2])
ss.reset_index(inplace=True)
ss.drop('index', axis=1, inplace=True)
ss.head(2)
|
s1 |
s2 |
core_cust_id |
s4 |
s5 |
s7 |
deal_date |
handle_date |
0 |
01004320210815202108151FT21227030881371 |
6 |
4b3451caa2 |
-12110.0 |
2021-08-15 |
2021-08-15 |
2021-08 |
2021-08 |
1 |
01004320210809202108091FT21221429469501 |
6 |
a030fed1b |
-150110.0 |
2021-08-09 |
2021-08-09 |
2021-08 |
2021-08 |
ss.shape
(6017813, 8)
gc.collect()
13384
ss['interval_month'] = ss['deal_date']==ss['handle_date']
ss['interval_month'] = ss['interval_month'].apply(lambda x: 1 if x=='False' else 0)
ss.head(2)
|
s1 |
s2 |
core_cust_id |
s4 |
s5 |
s7 |
deal_date |
handle_date |
interval_month |
0 |
01004320210815202108151FT21227030881371 |
6 |
4b3451caa2 |
-12110.0 |
2021-08-15 |
2021-08-15 |
2021-08 |
2021-08 |
0 |
1 |
01004320210809202108091FT21221429469501 |
6 |
a030fed1b |
-150110.0 |
2021-08-09 |
2021-08-09 |
2021-08 |
2021-08 |
0 |
ss['datetime_s'] = pd.to_datetime(ss['s5'],errors='coerce')
ss['dayofyear_s'] = ss['datetime_s'].dt.dayofyear.fillna(0).astype("int")
ss['weekofyear_s'] = ss['datetime_s'].dt.weekofyear.fillna(0).astype("int")
ss['weekday_s'] = ss['datetime_s'].dt.weekday.fillna(0).astype("int")
ss['quarter_s'] = ss['datetime_s'].dt.quarter.fillna(0).astype("int")
ss['is_wknd_s'] = ss['datetime_s'].dt.dayofweek // 4
display(ss.head(2))
|
s1 |
s2 |
core_cust_id |
s4 |
s5 |
s7 |
deal_date |
handle_date |
interval_month |
datetime_s |
dayofyear_s |
weekofyear_s |
weekday_s |
quarter_s |
is_wknd_s |
0 |
01004320210815202108151FT21227030881371 |
6 |
4b3451caa2 |
-12110.0 |
2021-08-15 |
2021-08-15 |
2021-08 |
2021-08 |
0 |
2021-08-15 |
227 |
32 |
6 |
3 |
1 |
1 |
01004320210809202108091FT21221429469501 |
6 |
a030fed1b |
-150110.0 |
2021-08-09 |
2021-08-09 |
2021-08 |
2021-08 |
0 |
2021-08-09 |
221 |
32 |
0 |
3 |
0 |
ss['s5'].max(), ss['s5'].min()
('2021-09-30', '2021-01-01')
ss['date'] = ss['s7'].apply(lambda x: x[:7])
dict_ = {'2021-07-01':'2021-06', '2021-08-01':'2021-07', '2021-09-01':'2021-08', '2021-10-01':'2021-09'}
dict_1 = {'2021-07-01':['2021-05', '2021-06'], '2021-08-01':['2021-06', '2021-07'],
'2021-09-01':['2021-07', '2021-08'], '2021-10-01':['2021-08', '2021-09']}
dfs = []
for month in sorted(df['a3'].unique()):
print(month)
tmp_df = df[df['a3'] == month]
stat_1 = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s1'].count().reset_index()
stat_1.columns = ['core_cust_id','deal_count']
stat_1['deal_nunique'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('nunique').values
stat_1['single_deal'] = stat_1['deal_count'] / stat_1['deal_nunique']
stat_1['deal_mean'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('mean').values
stat_1['borrow_sum'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('sum').values
stat_1['borrow_max'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('max').values
stat_1['borrow_min'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('min').values
stat_1['borrow_std'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('std').values
stat_1['borrow_median'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg('median').values
stat_1['deal_diff_max_min'] = ss[ss['date'] == dict_[month]].groupby('core_cust_id')['s4'].agg(diff_max_min).values
stat_2 = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s1'].count().reset_index()
stat_2.columns = ['core_cust_id','deal_count_2']
stat_2['deal_nunique_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('nunique').values
stat_2['single_deal_2'] = stat_2['deal_count_2'] / stat_2['deal_nunique_2']
stat_2['deal_mean_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('mean').values
stat_2['borrow_sum_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('sum').values
stat_2['borrow_max_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('max').values
stat_2['borrow_min_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('min').values
stat_2['borrow_std_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('std').values
stat_2['borrow_median_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg('median').values
stat_2['deal_diff_max_min_2'] = ss[(ss['date'] == dict_1[month][0]) & (ss['date'] == dict_1[month][1])].groupby('core_cust_id')['s4'].agg(diff_max_min).values
stat_3 = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s1'].count().reset_index()
stat_3.columns = ['core_cust_id','deal_count_b1']
stat_3['deal_nunique_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('nunique').values
stat_3['single_deal_b1'] = stat_3['deal_count_b1'] / stat_3['deal_nunique_b1']
stat_3['deal_mean_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('mean').values
stat_3['borrow_sum_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('sum').values
stat_3['borrow_max_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('max').values
stat_3['borrow_min_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('min').values
stat_3['borrow_std_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('std').values
stat_3['borrow_median_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg('median').values
stat_3['deal_diff_max_min_b1'] = ss[ss['date'] < dict_[month]].groupby('core_cust_id')['s4'].agg(diff_max_min).values
stat_4 = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s1'].count().reset_index()
stat_4.columns = ['core_cust_id','deal_count_b2']
stat_4['deal_nunique_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('nunique').values
stat_4['single_deal_b2'] = stat_4['deal_count_b2'] / stat_4['deal_nunique_b2']
stat_4['deal_mean_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('mean').values
stat_4['borrow_sum_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('sum').values
stat_4['borrow_max_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('max').values
stat_4['borrow_min_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('min').values
stat_4['borrow_std_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('std').values
stat_4['borrow_median_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg('median').values
stat_4['deal_diff_max_min_b2'] = ss[ss['date'] < dict_1[month][0]].groupby('core_cust_id')['s4'].agg(diff_max_min).values
tmp_df = tmp_df.merge(stat_1, on='core_cust_id', how='left')
tmp_df = tmp_df.merge(stat_2, on='core_cust_id', how='left')
tmp_df = tmp_df.merge(stat_3, on='core_cust_id', how='left')
tmp_df = tmp_df.merge(stat_4, on='core_cust_id', how='left')
dfs.append(tmp_df)
df = pd.concat(dfs).reset_index(drop=True)
2021-07-012021-08-012021-09-012021-10-01
gc.collect()
70
df.shape[0]
390977
F资产信息表
f = pd.read_csv(path2 + 'f.csv', thousands=',')
f.head(2)
|
core_cust_id |
f1 |
f2 |
f3 |
f4 |
f5 |
f6 |
f7 |
f8 |
f9 |
f10 |
f11 |
f12 |
f13 |
f14 |
f15 |
f16 |
f17 |
f18 |
f19 |
f20 |
f21 |
f22 |
0 |
48e055617a |
2020-11-26 |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
20210830 |
1 |
48e055617a |
2020-11-26 |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
20210730 |
drop_col = ['f'+str(i) for i in range(2,22)]
f.drop_duplicates(drop_col, keep='last', inplace=True)
f.head(2)
|
core_cust_id |
f1 |
f2 |
f3 |
f4 |
f5 |
f6 |
f7 |
f8 |
f9 |
f10 |
f11 |
f12 |
f13 |
f14 |
f15 |
f16 |
f17 |
f18 |
f19 |
f20 |
f21 |
f22 |
37 |
28fa6f2d3 |
2018-07-21 |
157.280 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
125.93 |
110.0 |
110.0 |
110.0 |
110.0 |
20210930 |
38 |
28fa6f2d3 |
2018-07-21 |
143.195 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
177.5 |
110.0 |
110.0 |
110.0 |
110.0 |
118.37 |
110.0 |
110.0 |
110.0 |
110.0 |
20210830 |
f['f_start_datetime'] = pd.to_datetime(f['f1'],errors='coerce')
f['f_diff_time'] = f['f_diff_time'].dt.days
f.head(2)
|
core_cust_id |
f1 |
f2 |
f3 |
f4 |
f5 |
f6 |
f7 |
f8 |
f9 |
f10 |
f11 |
f12 |
f13 |
f14 |
f15 |
f16 |
f17 |
f18 |
f19 |
f20 |
f21 |
f22 |
f_start_datetime |
f_end_datetime |
f_diff_time |
37 |
28fa6f2d3 |
2018-07-21 |
157.280 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
125.93 |
110.0 |
110.0 |
110.0 |
110.0 |
20210930 |
2018-07-21 |
2021-09-30 |
1167 |
38 |
28fa6f2d3 |
2018-07-21 |
143.195 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
177.5 |
110.0 |
110.0 |
110.0 |
110.0 |
118.37 |
110.0 |
110.0 |
110.0 |
110.0 |
20210830 |
2018-07-21 |
2021-08-30 |
1136 |
f['core_cust_id'].nunique()
30490
f['f22'].value_counts()
20210930 28081
20210830 25696
20210730 24054
20210630 23063
Name: f22, dtype: int64
|
core_cust_id |
f1 |
f2 |
f3 |
f4 |
f5 |
f6 |
f7 |
f8 |
f9 |
f10 |
f11 |
f12 |
f13 |
f14 |
f15 |
f16 |
f17 |
f18 |
f19 |
f20 |
f21 |
f22 |
f_start_datetime |
f_end_datetime |
f_diff_time |
year_f |
month_f |
day_f |
weekofyear_f |
weekday_f |
quarter_f |
is_wknd_f |
37 |
28fa6f2d3 |
2018-07-21 |
157.280 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
125.93 |
110.0 |
110.0 |
110.0 |
110.0 |
20210930 |
2018-07-21 |
2021-09-30 |
1167 |
2018 |
7 |
21 |
29 |
5 |
3 |
1 |
38 |
28fa6f2d3 |
2018-07-21 |
143.195 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
177.5 |
110.0 |
110.0 |
110.0 |
110.0 |
118.37 |
110.0 |
110.0 |
110.0 |
110.0 |
20210830 |
2018-07-21 |
2021-08-30 |
1136 |
2018 |
7 |
21 |
29 |
5 |
3 |
1 |
f['f22'] = f['f22'].astype('str')
f.head(2)
|
core_cust_id |
f1 |
f2 |
f3 |
f4 |
f5 |
f6 |
f7 |
f8 |
f9 |
f10 |
f11 |
f12 |
f13 |
f14 |
f15 |
f16 |
f17 |
f18 |
f19 |
f20 |
f21 |
f22 |
f_start_datetime |
f_end_datetime |
f_diff_time |
year_f |
month_f |
day_f |
weekofyear_f |
weekday_f |
quarter_f |
is_wknd_f |
37 |
28fa6f2d3 |
2018-07-21 |
157.280 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
125.93 |
110.0 |
110.0 |
110.0 |
110.0 |
20210930 |
2018-07-21 |
2021-09-30 |
1167 |
2018 |
7 |
21 |
29 |
5 |
3 |
1 |
38 |
28fa6f2d3 |
2018-07-21 |
143.195 |
110.0 |
110.0 |
110.0 |
110.0 |
185.0 |
110.0 |
110.0 |
110.0 |
110.0 |
177.5 |
110.0 |
110.0 |
110.0 |
110.0 |
118.37 |
110.0 |
110.0 |
110.0 |
110.0 |
20210830 |
2018-07-21 |
2021-08-30 |
1136 |
2018 |
7 |
21 |
29 |
5 |
3 |
1 |
f.info()
Int64Index: 100894 entries, 37 to 1038595
Data columns (total 33 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 core_cust_id 100894 non-null object
1 f1 100894 non-null object
2 f2 58531 non-null float64
3 f3 42937 non-null float64
4 f4 34636 non-null float64
5 f5 12879 non-null float64
6 f6 20731 non-null float64
7 f7 58531 non-null float64
8 f8 42937 non-null float64
9 f9 34636 non-null float64
10 f10 12879 non-null float64
11 f11 20731 non-null float64
12 f12 58531 non-null float64
13 f13 42937 non-null float64
14 f14 34636 non-null float64
15 f15 12879 non-null float64
16 f16 20731 non-null float64
17 f17 58531 non-null float64
18 f18 42937 non-null float64
19 f19 34636 non-null float64
20 f20 12879 non-null float64
21 f21 20731 non-null float64
22 f22 100894 non-null object
23 f_start_datetime 100894 non-null datetime64[ns]
24 f_end_datetime 100894 non-null datetime64[ns]
25 f_diff_time 100894 non-null int64
26 year_f 100894 non-null int32
27 month_f 100894 non-null int32
28 day_f 100894 non-null int32
29 weekofyear_f 100894 non-null int32
30 weekday_f 100894 non-null int32
31 quarter_f 100894 non-null int32
32 is_wknd_f 100894 non-null int64
dtypes: datetime64[ns](2), float64(20), int32(6), int64(2), object(3)
memory usage: 23.9+ MB
- f2 定期季日均—f3 大额存单季日均----f4 A 理财产品季日均—f5 代销资管季日均----f6 代销基金季日均—
- f7 定期时点余额–f8 大额存单时点余额–f9 A 理财产品时点余额–f10 代销资管时点余额–f11 代销基金时点余额
- f12 定期月日均–f13 大额存单月日均–f14 A 理财产品月日均–f15 代销资管月日均–f16 代销基金月日均
- f17 定期年日均–f18 大额存单年日均–f19 A 理财产品年日均–f20 代销资管年日均–f21 代销基金年日均
f['date'] = f['f22'].apply(lambda x: x[:6])
2021-07-012021-08-012021-09-012021-10-01
gc.collect()
15464
ff = f[['core_cust_id', 'f22', 'f_diff_time', 'year_f', 'month_f', 'day_f', 'weekofyear_f', 'weekday_f', 'quarter_f', 'is_wknd_f']]
ff.loc[ff['f22']=='20210630','f22'] = '2021-07-01'
ff.loc[ff['f22']=='20210730','f22'] = '2021-08-01'
ff.loc[ff['f22']=='20210830','f22'] = '2021-09-01'
ff.loc[ff['f22']=='20210930','f22'] = '2021-10-01'
ff.rename(columns={'f22' : 'a3'}, inplace=True)
df = pd.merge(df, ff, on=['core_cust_id', 'a3'], how='left')
df.shape[0]
390977
gc.collect()
34
df.nunique()
id 390977
core_cust_id 212637
prod_code 2
a2 1
a3 4
y 2
type 2
uid_count_1 22
pid_nunique_1 6
pid_mean_count_1 33
净值_apply_mean_1 702
净值_apply_max_1 42
净值_apply_min_1 43
净值_apply_std_1 812
净值_apply_median_1 202
净值_apply_sum_1 835
净值_apply_max_min_1 159
总金额_apply_mean_1 910
总金额_apply_max_1 689
总金额_apply_min_1 504
总金额_apply_std_1 489
总金额_apply_median_1 724
总金额_apply_sum_1 992
总金额_apply_max_min_1 385
超额管理费_apply_mean_1 796
超额管理费_apply_max_1 585
超额管理费_apply_min_1 443
超额管理费_apply_std_1 416
超额管理费_apply_median_1 641
超额管理费_apply_sum_1 846
超额管理费_apply_max_min_1 311
apply_amt_apply_mean_1 703
apply_amt_apply_max_1 377
apply_amt_apply_min_1 311
apply_amt_apply_std_1 693
apply_amt_apply_median_1 434
apply_amt_apply_sum_1 1037
apply_amt_apply_max_min_1 259
业务代码_apply_nunique_1 6
渠道标识_apply_nunique_1 2
资金状态_apply_nunique_1 5
交易状态_apply_nunique_1 3
deal_bus_apply_nunique_1 8
deal_bus_channel_apply_nunique_1 8
deal_bus_fund_apply_nunique_1 8
deal_bus_c_f_apply_nunique_1 8
uid_count_2 0
pid_nunique_2 0
pid_mean_count_2 0
净值_apply_mean_2 0
净值_apply_max_2 0
净值_apply_min_2 0
净值_apply_std_2 0
净值_apply_median_2 0
净值_apply_sum_2 0
净值_apply_max_min_2 0
总金额_apply_mean_2 0
总金额_apply_max_2 0
总金额_apply_min_2 0
总金额_apply_std_2 0
总金额_apply_median_2 0
总金额_apply_sum_2 0
总金额_apply_max_min_2 0
超额管理费_apply_mean_2 0
超额管理费_apply_max_2 0
超额管理费_apply_min_2 0
超额管理费_apply_std_2 0
超额管理费_apply_median_2 0
超额管理费_apply_sum_2 0
超额管理费_apply_max_min_2 0
apply_amt_apply_mean_2 0
apply_amt_apply_max_2 0
apply_amt_apply_min_2 0
apply_amt_apply_std_2 0
apply_amt_apply_median_2 0
apply_amt_apply_sum_2 0
apply_amt_apply_max_min_2 0
业务代码_apply_nunique_2 0
渠道标识_apply_nunique_2 0
资金状态_apply_nunique_2 0
交易状态_apply_nunique_2 0
deal_bus_apply_nunique_2 0
deal_bus_channel_apply_nunique_2 0
deal_bus_fund_apply_nunique_2 0
deal_bus_c_f_apply_nunique_2 0
uid_count_b 48
pid_nunique_b 14
pid_mean_count_b 146
净值_apply_mean_b 4033
净值_apply_max_b 76
净值_apply_min_b 62
净值_apply_std_b 5427
净值_apply_median_b 453
净值_apply_sum_b 4794
净值_apply_max_min_b 414
总金额_apply_mean_b 1790
总金额_apply_max_b 839
总金额_apply_min_b 11
总金额_apply_std_b 1902
总金额_apply_median_b 366
总金额_apply_sum_b 1936
总金额_apply_max_min_b 835
超额管理费_apply_mean_b 1438
超额管理费_apply_max_b 689
超额管理费_apply_min_b 1
超额管理费_apply_std_b 1589
超额管理费_apply_median_b 294
超额管理费_apply_sum_b 1529
超额管理费_apply_max_min_b 689
apply_amt_apply_mean_b 2578
apply_amt_apply_max_b 618
apply_amt_apply_min_b 443
apply_amt_apply_std_b 4182
apply_amt_apply_median_b 725
apply_amt_apply_sum_b 3875
apply_amt_apply_max_min_b 585
业务代码_apply_nunique_b 7
渠道标识_apply_nunique_b 3
资金状态_apply_nunique_b 5
交易状态_apply_nunique_b 4
deal_bus_apply_nunique_b 10
deal_bus_channel_apply_nunique_b 10
deal_bus_fund_apply_nunique_b 11
deal_bus_c_f_apply_nunique_b 11
uid_count_b1 43
pid_nunique_b1 13
pid_mean_count_b1 131
净值_apply_mean_b1 3199
净值_apply_max_b1 69
净值_apply_min_b1 54
净值_apply_std_b1 4263
净值_apply_median_b1 376
净值_apply_sum_b1 3804
净值_apply_max_min_b1 340
总金额_apply_mean_b1 1170
总金额_apply_max_b1 621
总金额_apply_min_b1 11
总金额_apply_std_b1 1239
总金额_apply_median_b1 252
总金额_apply_sum_b1 1272
总金额_apply_max_min_b1 617
超额管理费_apply_mean_b1 888
超额管理费_apply_max_b1 481
超额管理费_apply_min_b1 1
超额管理费_apply_std_b1 967
超额管理费_apply_median_b1 189
超额管理费_apply_sum_b1 948
超额管理费_apply_max_min_b1 481
apply_amt_apply_mean_b1 2205
apply_amt_apply_max_b1 594
apply_amt_apply_min_b1 435
apply_amt_apply_std_b1 3452
apply_amt_apply_median_b1 685
apply_amt_apply_sum_b1 3329
apply_amt_apply_max_min_b1 550
业务代码_apply_nunique_b1 7
渠道标识_apply_nunique_b1 3
资金状态_apply_nunique_b1 5
交易状态_apply_nunique_b1 4
deal_bus_apply_nunique_b1 10
deal_bus_channel_apply_nunique_b1 10
deal_bus_fund_apply_nunique_b1 10
deal_bus_c_f_apply_nunique_b1 10
uid_count_b2 36
pid_nunique_b2 13
pid_mean_count_b2 114
净值_apply_mean_b2 2407
净值_apply_max_b2 61
净值_apply_min_b2 46
净值_apply_std_b2 3137
净值_apply_median_b2 284
净值_apply_sum_b2 2857
净值_apply_max_min_b2 268
总金额_apply_mean_b2 581
总金额_apply_max_b2 332
总金额_apply_min_b2 11
总金额_apply_std_b2 621
总金额_apply_median_b2 134
总金额_apply_sum_b2 658
总金额_apply_max_min_b2 328
超额管理费_apply_mean_b2 347
超额管理费_apply_max_b2 203
超额管理费_apply_min_b2 1
超额管理费_apply_std_b2 381
超额管理费_apply_median_b2 77
超额管理费_apply_sum_b2 393
超额管理费_apply_max_min_b2 203
apply_amt_apply_mean_b2 1893
apply_amt_apply_max_b2 562
apply_amt_apply_min_b2 422
apply_amt_apply_std_b2 2781
apply_amt_apply_median_b2 653
apply_amt_apply_sum_b2 2815
apply_amt_apply_max_min_b2 493
业务代码_apply_nunique_b2 7
渠道标识_apply_nunique_b2 3
资金状态_apply_nunique_b2 5
交易状态_apply_nunique_b2 4
deal_bus_apply_nunique_b2 10
deal_bus_channel_apply_nunique_b2 10
deal_bus_fund_apply_nunique_b2 10
deal_bus_c_f_apply_nunique_b2 10
flow_redu_b1 26
flow_redu_b 28
flow_redu_b2 22
flow_redu_1 17
flow_redu_2 0
risk_count 3
risk_change 2
risk_level_mean 10
risk_max 5
risk_min 5
risk_count_b 14
risk_change_b 5
risk_level_mean_b 80
risk_max_b 5
risk_min_b 5
客户风险等级 5
date 98
risk_diff_date 123
性别 2
客户等级 4
年龄 84
age_gap 1
age_grade 221
age_bin10 10
age_bin15 15
age_bin20 20
age_bin25 25
age_bin30 30
age_频度 80
age_freq3 3
age_freq5 5
age_freq7 7
age_freq9 9
age_freq11 11
cust_grade_频度 4
prod_age_count 2
prod_age_max 2
prod_age_min 1
prod_age_sum 2
prod_age_std 2
prod_age_median 2
prod_age_nunique 2
uid_click_action_count_b 436
pid_click_action_nunique_b 169
uid_click_r3_nunique_b 2
uid_click_iswknd_sum_b 193
uid_click_iswknd_median_b 3
uid_click_click_hour_median_b 47
uid_click_click_hour_sum_b 2518
uid_click_click_hour_max_b 24
uid_click_click_hour_min_b 24
uid_click_click_hour_nunique_b 24
uid_click_click_hour_smean_b 1610
uid_click_click_month_median_b 15
uid_click_click_month_sum_b 1392
uid_click_click_month_max_b 7
uid_click_click_month_min_b 7
uid_click_click_month_nunique_b 7
uid_click_click_month_smean_b 1003
uid_click_click_d_median_b 61
uid_click_click_d_sum_b 2904
uid_click_click_d_max_b 31
uid_click_click_d_min_b 31
uid_click_click_d_nunique_b 31
uid_click_click_d_smean_b 1833
uid_click_weekofyear_r_median_b 85
uid_click_weekofyear_r_sum_b 3387
uid_click_weekofyear_r_max_b 32
uid_click_weekofyear_r_min_b 32
uid_click_weekofyear_r_nunique_b 32
uid_click_weekofyear_r_smean_b 1849
uid_click_weekday_r_median_b 13
uid_click_weekday_r_sum_b 845
uid_click_weekday_r_max_b 7
uid_click_weekday_r_min_b 7
uid_click_weekday_r_nunique_b 7
uid_click_weekday_r_smean_b 683
uid_click_quarter_r_median_b 5
uid_click_quarter_r_sum_b 753
uid_click_quarter_r_max_b 3
uid_click_quarter_r_min_b 3
uid_click_quarter_r_nunique_b 3
uid_click_quarter_r_smean_b 624
uid_click_action_count_b1 429
pid_click_action_nunique_b1 166
uid_click_r3_nunique_b1 2
uid_click_iswknd_sum_b1 195
uid_click_iswknd_median_b1 3
uid_click_click_hour_median_b1 47
uid_click_click_hour_sum_b1 2513
uid_click_click_hour_max_b1 24
uid_click_click_hour_min_b1 24
uid_click_click_hour_nunique_b1 24
uid_click_click_hour_smean_b1 1615
uid_click_click_month_median_b1 15
uid_click_click_month_sum_b1 1352
uid_click_click_month_max_b1 7
uid_click_click_month_min_b1 7
uid_click_click_month_nunique_b1 7
uid_click_click_month_smean_b1 1012
uid_click_click_d_median_b1 61
uid_click_click_d_sum_b1 2895
uid_click_click_d_max_b1 31
uid_click_click_d_min_b1 31
uid_click_click_d_nunique_b1 31
uid_click_click_d_smean_b1 1819
uid_click_weekofyear_r_median_b1 84
uid_click_weekofyear_r_sum_b1 3261
uid_click_weekofyear_r_max_b1 32
uid_click_weekofyear_r_min_b1 32
uid_click_weekofyear_r_nunique_b1 32
uid_click_weekofyear_r_smean_b1 1822
uid_click_weekday_r_median_b1 13
uid_click_weekday_r_sum_b1 839
uid_click_weekday_r_max_b1 7
uid_click_weekday_r_min_b1 7
uid_click_weekday_r_nunique_b1 7
uid_click_weekday_r_smean_b1 671
uid_click_quarter_r_median_b1 5
uid_click_quarter_r_sum_b1 738
uid_click_quarter_r_max_b1 3
uid_click_quarter_r_min_b1 3
uid_click_quarter_r_nunique_b1 3
uid_click_quarter_r_smean_b1 630
cp_click_action_count_b 150
cp_click_r3_nunique_b 1
cp_click_iswknd_sum_b 72
cp_click_iswknd_median_b 3
cp_click_click_hour_median_b 47
cp_click_click_hour_sum_b 913
cp_click_click_hour_max_b 24
cp_click_click_hour_min_b 24
cp_click_click_hour_nunique_b 24
cp_click_click_hour_smean_b 494
cp_click_click_month_median_b 12
cp_click_click_month_sum_b 508
cp_click_click_month_max_b 6
cp_click_click_month_min_b 6
cp_click_click_month_nunique_b 6
cp_click_click_month_smean_b 290
cp_click_click_d_median_b 61
cp_click_click_d_sum_b 1083
cp_click_click_d_max_b 31
cp_click_click_d_min_b 31
cp_click_click_d_nunique_b 31
cp_click_click_d_smean_b 559
cp_click_weekofyear_r_median_b 51
cp_click_weekofyear_r_sum_b 1312
cp_click_weekofyear_r_max_b 24
cp_click_weekofyear_r_min_b 24
cp_click_weekofyear_r_nunique_b 23
cp_click_weekofyear_r_smean_b 498
cp_click_weekday_r_median_b 13
cp_click_weekday_r_sum_b 293
cp_click_weekday_r_max_b 7
cp_click_weekday_r_min_b 7
cp_click_weekday_r_nunique_b 7
cp_click_weekday_r_smean_b 267
cp_click_quarter_r_median_b 5
cp_click_quarter_r_sum_b 264
cp_click_quarter_r_max_b 3
cp_click_quarter_r_min_b 3
cp_click_quarter_r_nunique_b 3
cp_click_quarter_r_smean_b 209
cp_click_action_count_b1 138
cp_click_r3_nunique_b1 1
cp_click_iswknd_sum_b1 67
cp_click_iswknd_median_b1 3
cp_click_click_hour_median_b1 47
cp_click_click_hour_sum_b1 871
cp_click_click_hour_max_b1 24
cp_click_click_hour_min_b1 24
cp_click_click_hour_nunique_b1 24
cp_click_click_hour_smean_b1 464
cp_click_click_month_median_b1 12
cp_click_click_month_sum_b1 471
cp_click_click_month_max_b1 6
cp_click_click_month_min_b1 6
cp_click_click_month_nunique_b1 6
cp_click_click_month_smean_b1 281
cp_click_click_d_median_b1 61
cp_click_click_d_sum_b1 1029
cp_click_click_d_max_b1 31
cp_click_click_d_min_b1 31
cp_click_click_d_nunique_b1 31
cp_click_click_d_smean_b1 524
cp_click_weekofyear_r_median_b1 51
cp_click_weekofyear_r_sum_b1 1224
cp_click_weekofyear_r_max_b1 24
cp_click_weekofyear_r_min_b1 24
cp_click_weekofyear_r_nunique_b1 22
cp_click_weekofyear_r_smean_b1 473
cp_click_weekday_r_median_b1 13
cp_click_weekday_r_sum_b1 279
cp_click_weekday_r_max_b1 7
cp_click_weekday_r_min_b1 7
cp_click_weekday_r_nunique_b1 7
cp_click_weekday_r_smean_b1 250
cp_click_quarter_r_median_b1 5
cp_click_quarter_r_sum_b1 247
cp_click_quarter_r_max_b1 3
cp_click_quarter_r_min_b1 3
cp_click_quarter_r_nunique_b1 3
cp_click_quarter_r_smean_b1 198
deal_count 277
deal_nunique 202
single_deal 2232
deal_mean 73099
borrow_sum 74470
borrow_max 32457
borrow_min 24461
borrow_std 67088
borrow_median 36804
deal_diff_max_min 48290
deal_count_2 0
deal_nunique_2 0
single_deal_2 0
deal_mean_2 0
borrow_sum_2 0
borrow_max_2 0
borrow_min_2 0
borrow_std_2 0
borrow_median_2 0
deal_diff_max_min_2 0
deal_count_b1 980
deal_nunique_b1 581
single_deal_b1 9244
deal_mean_b1 122604
borrow_sum_b1 124481
borrow_max_b1 37325
borrow_min_b1 20383
borrow_std_b1 122418
borrow_median_b1 41283
deal_diff_max_min_b1 65336
deal_count_b2 869
deal_nunique_b2 517
single_deal_b2 7963
deal_mean_b2 114017
borrow_sum_b2 115903
borrow_max_b2 34360
borrow_min_b2 19338
borrow_std_b2 113487
borrow_median_b2 39598
deal_diff_max_min_b2 61295
f2_mean 7351
f3_mean 2446
f5_mean 1975
f6_mean 6853
f7_mean 3326
f8_mean 1113
f10_mean 1721
f11_mean 6341
f12_mean 4592
f13_mean 1637
f15_mean 1836
f16_mean 6640
f17_mean 15999
f18_mean 4912
f20_mean 2684
f21_mean 7338
f2_nunique 2
f2_max 7351
f2_min 7351
f2_sum 7352
f2_std 0
f2_median 7351
f2_diff_max_min 1
f3_nunique 2
f3_max 2446
f3_min 2446
f3_sum 2447
f3_std 0
f3_median 2446
f3_diff_max_min 1
f5_nunique 2
f5_max 1975
f5_min 1975
f5_sum 1976
f5_std 0
f5_median 1975
f5_diff_max_min 1
f6_nunique 2
f6_max 6853
f6_min 6853
f6_sum 6854
f6_std 0
f6_median 6853
f6_diff_max_min 1
f7_nunique 2
f7_max 3326
f7_min 3326
f7_sum 3327
f7_std 0
f7_median 3326
f7_diff_max_min 1
f8_nunique 2
f8_max 1113
f8_min 1113
f8_sum 1114
f8_std 0
f8_median 1113
f8_diff_max_min 1
f10_nunique 2
f10_max 1721
f10_min 1721
f10_sum 1722
f10_std 0
f10_median 1721
f10_diff_max_min 1
f11_nunique 2
f11_max 6341
f11_min 6341
f11_sum 6342
f11_std 0
f11_median 6341
f11_diff_max_min 1
f12_nunique 2
f12_max 4592
f12_min 4592
f12_sum 4593
f12_std 0
f12_median 4592
f12_diff_max_min 1
f13_nunique 2
f13_max 1637
f13_min 1637
f13_sum 1638
f13_std 0
f13_median 1637
f13_diff_max_min 1
f15_nunique 2
f15_max 1836
f15_min 1836
f15_sum 1837
f15_std 0
f15_median 1836
f15_diff_max_min 1
f16_nunique 2
f16_max 6640
f16_min 6640
f16_sum 6641
f16_std 0
f16_median 6640
f16_diff_max_min 1
f17_nunique 2
f17_max 15999
f17_min 15999
f17_sum 16000
f17_std 0
f17_median 15999
f17_diff_max_min 1
f18_nunique 2
f18_max 4912
f18_min 4912
f18_sum 4913
f18_std 0
f18_median 4912
f18_diff_max_min 1
f20_nunique 2
f20_max 2684
f20_min 2684
f20_sum 2685
f20_std 0
f20_median 2684
f20_diff_max_min 1
f21_nunique 2
f21_max 7338
f21_min 7338
f21_sum 7339
f21_std 0
f21_median 7338
f21_diff_max_min 1
f2_mean_b 8488
f3_mean_b 2931
f5_mean_b 1747
f6_mean_b 4647
f7_mean_b 2807
f8_mean_b 1086
f10_mean_b 1438
f11_mean_b 4193
f12_mean_b 4616
f13_mean_b 2079
f15_mean_b 1523
f16_mean_b 4424
f17_mean_b 12085
f18_mean_b 3745
f20_mean_b 2061
f21_mean_b 4836
f2_nunique_b 4
f2_max_b 5463
f2_min_b 5238
f2_sum_b 9220
f2_std_b 4754
f2_median_b 7300
f2_diff_max_min_b 3690
f3_nunique_b 4
f3_max_b 1960
f3_min_b 2026
f3_sum_b 3688
f3_std_b 1134
f3_median_b 2569
f3_diff_max_min_b 859
f5_nunique_b 4
f5_max_b 1535
f5_min_b 1219
f5_sum_b 1848
f5_std_b 846
f5_median_b 1668
f5_diff_max_min_b 789
f6_nunique_b 4
f6_max_b 4010
f6_min_b 3169
f6_sum_b 4701
f6_std_b 2584
f6_median_b 4466
f6_diff_max_min_b 2347
f7_nunique_b 4
f7_max_b 2571
f7_min_b 2179
f7_sum_b 3851
f7_std_b 889
f7_median_b 2634
f7_diff_max_min_b 783
f8_nunique_b 4
f8_max_b 1037
f8_min_b 1024
f8_sum_b 1961
f8_std_b 70
f8_median_b 1050
f8_diff_max_min_b 53
f10_nunique_b 4
f10_max_b 1362
f10_min_b 1038
f10_sum_b 1566
f10_std_b 639
f10_median_b 1395
f10_diff_max_min_b 607
f11_nunique_b 4
f11_max_b 3469
f11_min_b 3154
f11_sum_b 4247
f11_std_b 2328
f11_median_b 4063
f11_diff_max_min_b 2213
f12_nunique_b 4
f12_max_b 3507
f12_min_b 3047
f12_sum_b 5552
f12_std_b 2104
f12_median_b 4121
f12_diff_max_min_b 1669
f13_nunique_b 4
f13_max_b 1477
f13_min_b 1517
f13_sum_b 2887
f13_std_b 676
f13_median_b 1834
f13_diff_max_min_b 445
f15_nunique_b 4
f15_max_b 1409
f15_min_b 1124
f15_sum_b 1645
f15_std_b 700
f15_median_b 1487
f15_diff_max_min_b 678
f16_nunique_b 4
f16_max_b 3737
f16_min_b 3267
f16_sum_b 4478
f16_std_b 2420
f16_median_b 4289
f16_diff_max_min_b 2322
f17_nunique_b 4
f17_max_b 10601
f17_min_b 7600
f17_sum_b 12232
f17_std_b 7247
f17_median_b 11760
f17_diff_max_min_b 6906
f18_nunique_b 4
f18_max_b 3342
f18_min_b 2761
f18_sum_b 4410
f18_std_b 1659
f18_median_b 3670
f18_diff_max_min_b 1553
f20_nunique_b 4
f20_max_b 1688
f20_min_b 1747
f20_sum_b 2154
f20_std_b 1049
f20_median_b 2037
f20_diff_max_min_b 1022
f21_nunique_b 4
f21_max_b 4322
f21_min_b 3447
f21_sum_b 4893
f21_std_b 2693
f21_median_b 4760
f21_diff_max_min_b 2586
f_diff_time 1162
year_f 4
month_f 12
day_f 31
weekofyear_f 53
weekday_f 7
quarter_f 4
is_wknd_f 2
dtype: int64