R语言-小牛试刀-用datatable进行数据的预处理


# data.table包预处理数据
library(data.table)
# 导入申请表
app_row<-read.csv("E:/工作整理/06 analysis data/申请数据-201908.csv",stringsAsFactors=F)
app_row<-data.table(app_row)
# 导入所有的三方data
path<-'E:/工作整理/04 政策制定/4.5 三方数据拒贷规则/三方数据-线下邮件发送'
files<-list.files(path=path,pattern = '*.csv')
for(file in files){
    name<-strsplit(file,'_201908')[[1]]
    assign(paste0(name,'_data'),data.table(read.csv(paste0(path,'/',file),stringsAsFactors = F)))
}
# 改变三方数据表列名称
setnames(td_qizha_V1_data,names(td_qizha_V1_data),paste0('td_',names(td_qizha_V1_data)))
setnames(td_qizha_V2_data, names(td_qizha_V2_data), paste0('td_', names(td_qizha_V2_data)))
setnames(td_zhixin_data, names(td_zhixin_data), paste0('td_', names(td_zhixin_data)))
setnames(td_query_data, names(td_query_data), paste0('td_', names(td_query_data)))
setnames(xy_apply_data, names(xy_apply_data), paste0('xy_', names(xy_apply_data)))
setnames(xy_default_data, names(xy_default_data), paste0('xy_', names(xy_default_data)))
setnames(xy_duotou_data, names(xy_duotou_data), paste0('xy_', names(xy_duotou_data)))
setnames(yl_query_data, names(yl_query_data), paste0('yl_', names(yl_query_data))) 
#银联数据
setnames(ym_query_data, names(ym_query_data), paste0('ym_', names(ym_query_data)))
# 删除表内某一列数据
td_query_data[, td_query_date := NULL]
td_query_data[, td_modify_date := NULL]
td_zhixin_data[, td_query_date := NULL]
td_zhixin_data[, td_modify_date := NULL]
td_zhixin_data[, td_credit_id := NULL]
xy_apply_data[, xy_modify_date := NULL]
xy_apply_data[, xy_query_date := NULL]
xy_apply_data[, xy_query_id := NULL]
xy_default_data[, xy_modify_date := NULL]
xy_default_data[, xy_query_date := NULL]
xy_default_data[, xy_query_id := NULL]
xy_duotou_data[, xy_modify_date := NULL]
xy_duotou_data[, xy_query_date := NULL]
xy_duotou_data[, xy_query_id := NULL]
xy_duotou_data[, xy_code  := NULL]
yl_query_data[, yl_modify_date := NULL]
yl_query_data[, yl_query_date := NULL]
ym_query_data[, ym_modify_date := NULL]
ym_query_data[, ym_query_date := NULL]
#用merge合并两个数据表
mergedata<-merge(app_row,std_target_201907.csv_data,by='transport_id',all.x = T)
mergedata<-merge(mergedata, td_query_data,by.x='transport_id',by.y='td_transport_id',all,x=T)
mergedata <- merge(mergedata
                   , td_qizha_V1_data[, c('td_transport_id', 'td_fraud_score', 'td_fraud_decision'), with = F]
                   , by.x = 'transport_id', by.y = 'td_transport_id'
                   , all.x = T)
mergedata <- merge(mergedata, td_zhixin_data, by.x = 'transport_id', by.y = 'td_transport_id', all.x = T)
mergedata <- merge(mergedata, xy_apply_data[xy_valid_flg == 1], by.x = 'transport_id', by.y = 'xy_transport_id', all.x = T)
mergedata <- merge(mergedata, xy_default_data[xy_valid_flg == 1], by.x = 'transport_id', by.y = 'xy_transport_id', all.x = T)
mergedata <- merge(mergedata, xy_duotou_data[xy_valid_flg == 1], by.x = 'transport_id', by.y = 'xy_transport_id', all.x = T)
mergedata <- merge(mergedata, yl_query_data, by.x = 'transport_id', by.y = 'yl_transport_id', all.x = T)
mergedata <- merge(mergedata, ym_query_data[ym_valid_flg == 1], by.x = 'transport_id', by.y = 'ym_transport_id', all.x = T)



你可能感兴趣的:(R语言)