系统版本:CentOS 7.5
数据库:MySQL 5.7.23
ODS表为原始数据,数据从客户业务系统拉取,包含中文数据;
DW表去掉中文后的数据,数据从ODS表获取;
DM为展示的数据,即系统最后需求的数据;
T为基础表。
银行表:t_bank
原始数据表:
ods_bankdata_gs,ods_bankdata_ny,ods_bankdata_zg,ods_bankdata_js……
ods_bankdata_gs_history,ods_bankdata_ny_history……..
DW数据表:
dw_bankdata_gs,dw_bankdata_ny,dw_bankdata_zg,dw_bankdata_js……
dw_bankdata_gs_history,dw_bankdata_ny_hisroty……..
DM展示表:dm_bankfor
#创建数据库
create database dbBank;
/*
需求:
拉取过来的数据(比如浦发,兴业,农行等),格式如:
兴业:
手机号,姓名,开通信用卡功能(10万条)
浦发:
手机号,姓名,开通信用卡功能(10万条)
......
将上面的数据合并,因为每个身份证号有可能在多家银行开有信用卡,需要将20万条数据,假如这时只有18万不重复身份证号的数据
*/
#################################################
################## BASE ##################
#################################################
/*银行表:t_bank
序号,代码,银行名,银行简写
bankId
bankCode
bankName
bankAD
*/
create table t_bank(
bankId int primary key auto_increment comment '主键自增',
bankCode varchar(50) comment '银行代码',
bankName varchar(200) not null unique comment '银行名',
bankAD varchar(20) not null unique comment '银行简写'
)comment='银行表';
insert into t_bank values(1,'','工商银行','gs');
insert into t_bank values(2,'','农业银行','ny');
insert into t_bank values(3,'','中国银行','zg');
insert into t_bank values(4,'','建设银行','js');
insert into t_bank values(5,'','交通银行','jt');
insert into t_bank values(6,'','进出口银行','jck');
insert into t_bank values(7,'','国家开发银行','kf');
insert into t_bank values(8,'','农业发展银行','nf');
insert into t_bank values(9,'','招商银行','zs');
insert into t_bank values(10,'','浦发银行','pf');
insert into t_bank values(11,'','中信银行','zx');
insert into t_bank values(12,'','光大银行','gd');
insert into t_bank values(13,'','华夏银行','hx');
insert into t_bank values(14,'','民生银行','ms');
insert into t_bank values(15,'','广发银行','gf');
insert into t_bank values(16,'','兴业银行','xy');
insert into t_bank values(17,'','平安银行','pa');
insert into t_bank values(18,'','浙商银行','zhes');
insert into t_bank values(19,'','恒丰银行','hf');
insert into t_bank values(20,'','渤海银行','bh');
insert into t_bank values(21,'','广西农村信用社','gxnxs');
insert into t_bank values(22,'','邮政储蓄银行','yz');
commit;
##################################################
################## ODS ####################
##################################################
/*原始数据表:根据银行类型分表:ods_bankdata_gs,ods_bankdata_ny,ods_bankdata_zg,ods_bankdata_js,
序号,姓名,手机号,身份证号,银行名,插入时间,备注
id
name
phone
idNumber
bankName
insTime
notes
*/
#--工商银行_ods
drop table ods_bankdata_gs;
create table ods_bankdata_gs(
id bigint primary key auto_increment comment '序号',
name varchar(50) comment '姓名',
phone bigint comment '手机号',
idnumber varchar(20) comment '身份证号',
bankname varchar(100) comment '银行名',
instime datetime comment '插入时间',
notes varchar(200) comment '备注'
)comment='工商银行原始表';
insert into ods_bankdata_gs values(1,'王正初',18502461576,'450204198004293000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(2,'李娟丽',18845873946,'653101198412151000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(3,'赵语山',19957322330,'320621197702127000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(4,'张善和',16322872701,'150722198112018000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(5,'张南烟',12850907936,'52262819870913206x','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(6,'王梦桃',15816875148,'440403199007149000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(7,'王清涵',15875793602,'430923197805092000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(8,'李春娇',12610912365,'130500199411174000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(9,'王冰凡',14968798213,'141026198706191000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(10,'王铃语',19781652609,'533323198803306000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(11,'李博裕',11897455113,'340826198707281000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(12,'李昊然',16237208490,'530181198712230000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(13,'李沛凝',18044968073,'150100198704294000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(14,'张诗蕾',16534073687,'610327198502213000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(15,'张怀玉',18873064922,'210181198503183000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(16,'张晨希',14113640518,'150105199306017000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(17,'赵安露',13071845241,'441825198011015000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(18,'赵凝芙',18809594871,'433126199306095000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(19,'罗初晴',19594219248,'500108199111039000','工商银行',sysdate(),'');
insert into ods_bankdata_gs values(20,'罗飞昂',17786947697,'411724197606164000','工商银行',sysdate(),'');
commit;
#--农业银行_ods
drop table ods_bankdata_ny;
create table ods_bankdata_ny(
id bigint primary key auto_increment comment '序号',
name varchar(50) comment '姓名',
phone bigint comment '手机号',
idnumber varchar(20) comment '身份证号',
bankname varchar(100) comment '银行名',
instime datetime comment '插入时间',
notes varchar(200) comment '备注'
)comment='农业银行原始表';
insert into ods_bankdata_ny values(1,'张善和',16322872701,'150722198112018000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(2,'李昊然',16237208490,'530181198712230000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(3,'赵安露',13071845241,'441825198011015000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(4,'罗飞昂',17786947697,'411724197606164000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(5,'赵凝芙',18809594871,'433126199306095000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(6,'陈弘扬',18492438399,'231283197705204000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(7,'陈晶晶',12552681742,'15010019870622882x','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(8,'陈奇思',18725476234,'441601199201178000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(9,'陈凝远',13876150809,'621224198305040000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(10,'李学',16162095706,'150102198008201000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(11,'张戈',17573568191,'140524199110122000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(12,'卜洪',12939298114,'370826198206133000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(13,'罗明',18581723138,'411224197509134000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(14,'欧阳伟大',18972010816,'610822198907180000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(15,'钱怀柔',18910192175,'513426199302189000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(16,'钱采波',13645247225,'451221199203159000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(17,'钱秋芳',18550469783,'321324198203030000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(18,'朱天青',13827526397,'450901198007240000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(19,'朱思凡',18075584434,'450981199210025000','农业银行',sysdate(),'');
insert into ods_bankdata_ny values(20,'朱天薇',17391130746,'451122198505270000','农业银行',sysdate(),'');
commit;
#----ods历史表,用于存储历史数据----#
#--工商银行_ods_history
drop table ods_bankdata_gs_history;
create table ods_bankdata_gs_history(
id bigint primary key auto_increment comment '序号',
name varchar(50) comment '姓名',
phone bigint comment '手机号',
idnumber varchar(20) comment '身份证号',
bankname varchar(100) comment '银行名',
instime datetime comment '插入时间',
notes varchar(200) comment '备注'
)comment='工商银行原始表_历史表';
#--农业银行_ods_history
drop table ods_bankdata_ny_history;
create table ods_bankdata_ny_history(
id bigint primary key auto_increment comment '序号',
name varchar(50) comment '姓名',
phone bigint comment '手机号',
idnumber varchar(20) comment '身份证号',
bankname varchar(100) comment '银行名',
instime datetime comment '插入时间',
notes varchar(200) comment '备注'
)comment='农业银行原始表历史表';
################################################
################ DW #####################
################################################
/*DW表:根据银行类型分表: dw_bankdata_gs,dw_bankdata_ny,dw_bankdata_zg,dw_bankdata_js,
序号,姓名,手机号,身份证号,银行名,银行简写,插入时间
id
idnumber
phone
bankad
batch
instime
*/
#--工商银行_dw
drop table dw_bankdata_gs;
create table dw_bankdata_gs(
id bigint primary key auto_increment comment '序号',
idnumber varchar(20) comment '身份证号',
phone bigint comment '手机号',
bankad varchar(100) comment '银行简写',
batch_gs int comment '工商批次',
instime datetime comment '插入时间'
)comment='工商银行dw';
#--农业银行_dw
drop table dw_bankdata_ny;
create table dw_bankdata_ny(
id bigint primary key auto_increment comment '序号',
idnumber varchar(20) comment '身份证号',
phone bigint comment '手机号',
bankad varchar(100) comment '银行简写',
batch_ny int comment '农业批次',
instime datetime comment '插入时间'
)comment='农业银行dw';
#######--------历史表,用于存储历史数据
#--工商银行_dw_history
drop table dw_bankdata_gs_history;
create table dw_bankdata_gs_history(
id bigint primary key auto_increment comment '序号',
idnumber varchar(20) comment '身份证号',
phone bigint comment '手机号',
bankad varchar(100) comment '银行简写',
batch_gs int comment '工商批次',
instime date comment '插入时间'
)comment='工商银行dw';
#--农业银行_dw_history
drop table dw_bankdata_ny_history;
create table dw_bankdata_ny_history(
id bigint primary key auto_increment comment '序号',
idnumber varchar(20) comment '身份证号',
phone bigint comment '手机号',
bankad varchar(100) comment '银行简写',
batch_ny int comment '农业批次',
instime datetime comment '插入时间'
)comment='农业银行dw';
##################################
######## DM ###############
##################################
/*
**************************
实际需求的表--数据表:dm_bankfor
序号,身份证号,手机号,银行简写,插入时间
id
idnumber
phone
bankad
instime
batch_gs
instime_gs
batch_ny
instime_ny
……..
**************************
*/
drop table dm_bankfor;
create table dm_bankfor(
id bigint primary key auto_increment comment '序号',
idnumber varchar(20) not null unique comment '身份证号',
phone bigint comment '手机号',
bankad varchar(100) comment '银行简写汇总',
batch_gs int comment '工商批次',
instime_gs datetime comment '工商插入时间',
batch_ny int comment '农业批次',
instime_ny datetime comment '农业插入时间',
batch_zg int comment '中国银行批次',
instime_zg datetime comment '中国银行插入时间',
batch_js int comment '建设批次',
instime_js datetime comment '建设插入时间',
batch_jt int comment '交通批次',
instime_jt datetime comment '交通插入时间',
batch_jck int comment '进出口批次',
instime_jck datetime comment '进出口插入时间',
batch_kf int comment '国家开发批次',
instime_kf datetime comment '国家开发插入时间',
batch_nf int comment '农业发展批次',
instime_nf datetime comment '农业发展插入时间',
batch_zs int comment '招商批次',
instime_zs datetime comment '招商插入时间',
batch_pf int comment '浦发批次',
instime_pf datetime comment '浦发插入时间',
batch_zx int comment '中信批次',
instime_zx datetime comment '中信插入时间',
batch_gd int comment '光大批次',
instime_gd datetime comment '光大插入时间',
batch_hx int comment '华夏批次',
instime_hx datetime comment '华夏插入时间',
batch_ms int comment '民生批次',
instime_ms datetime comment '民生插入时间',
batch_gf int comment '广发批次',
instime_gf datetime comment '广发插入时间',
batch_xy int comment '兴业批次',
instime_xy datetime comment '兴业插入时间',
batch_pa int comment '平安批次',
instime_pa datetime comment '平安插入时间',
batch_zhes int comment '浙商批次',
instime_zhes datetime comment '浙商插入时间',
batch_hf int comment '恒丰批次',
instime_hf datetime comment '恒丰插入时间',
batch_bh int comment '渤海批次',
instime_bh datetime comment '渤海插入时间',
batch_gxnxs int comment '广西农村信用社批次',
instime_gxnxs datetime comment '广西农村信用社插入时间',
batch_yz int comment '邮政储蓄银行批次',
instime_yz datetime comment '邮政储蓄银行插入时间'
)comment='信用卡开通数据表';
##################################################
################ 存储过程 #################
##################################################
/*
**************************
ods到dw,工商银行
**************************
*/
delimiter //
drop procedure pro_dw_gs;
create procedure pro_dw_gs()
begin
#声明一个标志done,用来判断游标是否遍历完成
declare v_done int default false;
#特别注意这里的名字不能与由游标中使用的列明相同,否则得到的数据都是null
declare v_idnumber varchar ( 255 ) default null;
declare v_phone bigint default null;
declare v_instime date default null;
declare v_batch_gs int;
#定义一个判断dw_bankdata_gs表是否有数据
declare v_dw_gs bigint;
#声明游标对应的 sql 语句
declare cur cursor for select idnumber,phone,instime from ods_bankdata_gs;
#在游标循环到最后会将 done 设置为 true
declare continue handler for not found set v_done = true;
#假如dw_bankdata_gs不为空,插入数据前将dw_bankdata_gs的数据插入dw_bankdata_gs_history历史表
select count(*) into v_dw_gs from dw_bankdata_gs;
if v_dw_gs <> 0 then
insert into dw_bankdata_gs_history select * from dw_bankdata_gs;
delete from dw_bankdata_gs;
commit;
end if;
#batch_gs的值
select count(distinct batch_gs) into v_batch_gs from dw_bankdata_gs_history;
if v_batch_gs = 0 then
set v_batch_gs = 1;
else
select max(distinct batch_gs)+1 into v_batch_gs from dw_bankdata_gs_history;
end if;
#执行查询
open cur;
#遍历游标每一行
read_loop :loop
#提取游标里的数据
fetch cur into v_idnumber,v_phone,v_instime;
#声明结束的时候
if v_done then
leave read_loop;
end if;
insert into dw_bankdata_gs ( idnumber, phone, bankad, batch_gs, instime )
values( v_idnumber, v_phone, 'gs', v_batch_gs, v_instime );
end loop;
#关闭游标
close cur;
end
//
delimiter ;
#执行存储过程
call pro_dw_gs();
#删除存储过程
#drop procedure pro_dw_gs;
/*
**************************
ods到dw,农业银行
**************************
*/
delimiter //
create procedure pro_dw_ny()
begin
#声明一个标志done,用来判断游标是否遍历完成
declare v_done int default false;
#特别注意这里的名字不能与由游标中使用的列明相同,否则得到的数据都是null
declare v_idnumber varchar ( 255 ) default null;
declare v_phone bigint default null;
declare v_instime date default null;
declare v_batch_ny int;
declare v_dw_ny bigint;
#声明游标对应的 sql 语句
declare cur cursor for select idnumber,phone,instime from ods_bankdata_ny;
#在游标循环到最后会将 done 设置为 true
declare continue handler for not found set v_done = true;
#假如dw_bankdata_gs不为空,插入数据前将dw_bankdata_gs的数据插入dw_bankdata_gs_history历史表
select count(*) into v_dw_ny from dw_bankdata_ny;
if v_dw_ny <> 0 then
insert into dw_bankdata_ny_history select * from dw_bankdata_ny;
delete from dw_bankdata_ny;
commit;
end if;
#batch_ny的值
select count(distinct batch_ny) into v_batch_ny from dw_bankdata_ny_history;
if v_batch_ny = 0 then
set v_batch_ny = 1;
else
select max(distinct batch_ny)+1 into v_batch_ny from dw_bankdata_ny_history;
end if;
#执行查询
open cur;
#遍历游标每一行
read_loop :loop
#提取游标里的数据
fetch cur into v_idnumber,v_phone,v_instime;
#声明结束的时候
if v_done then
leave read_loop;
end if;
insert into dw_bankdata_ny ( idnumber, phone, bankad, batch_ny, instime )
values( v_idnumber, v_phone, 'ny', v_batch_ny, v_instime );
end loop;
#关闭游标
close cur;
end
//
delimiter ;
#执行存储过程
call pro_dw_ny();
#删除存储过程
#drop procedure pro_dw_ny;
/*
**************************
DM 汇总数据_工商银行
**************************
*/
delimiter //
#drop procedure if exists pro_dm_all_gs;
create procedure pro_dm_all_gs()
begin
#声明一个标志done,用来判断游标是否遍历完成
declare v_done int default false;
#特别注意这里的名字不能与由游标中使用的列明相同,否则得到的数据都是null
declare v_idnumber varchar ( 255 ) default null;
declare v_phone bigint default null;
declare v_bankad varchar(200) default null;
declare v_batch_gs int default null;
declare v_instime_gs date default null;
#定义两个判断符
declare v_exists varchar(10) default null;
declare v_exists_gs varchar(10) default null;
#声明游标对应的 sql 语句
declare cur cursor for select idnumber,phone,bankad,batch_gs,instime from dw_bankdata_gs;
#在游标循环到最后会将 done 设置为 true
declare continue handler for not found set v_done = true;
#执行查询
open cur;
#遍历游标每一行
read_loop :loop
#提取游标里的数据
fetch cur into v_idnumber,v_phone,v_bankad,v_batch_gs,v_instime_gs;
#声明结束的时候
if v_done then
leave read_loop;
end if;
#判断是否有这个身份证号码
select count(*) into v_exists from dm_bankfor where idnumber = v_idnumber;
if v_exists = 0 then
#dm_bankfor表里没有这个身份证的数据,直接插入数据
insert into dm_bankfor ( idnumber, phone, bankad, batch_gs, instime_gs )
values( v_idnumber, v_phone, v_bankad, v_batch_gs, v_instime_gs );
else
#dm_bankfor表有这个身份证的数据,更新
update dm_bankfor set bankad='gs'
where idnumber = v_idnumber and length(bankad) = 0;
#
update dm_bankfor set bankad=concat(bankad,',',v_bankad)
where idnumber = v_idnumber and bankad not like ('%gs%') and length(bankad) <> 0;
#更新batch
update dm_bankfor set batch_gs = v_batch_gs where idnumber = v_idnumber and batch_gs is null;
#更新instime_gs
update dm_bankfor set instime_gs = v_instime_gs where idnumber = v_idnumber and instime_gs is null;
end if;
end loop;
#关闭游标
close cur;
end
//
delimiter ;
call pro_dm_all_gs();
#删除存储过程
#drop procedure pro_dm_all_gs;
/*
**************************
DM 汇总数据_农业银行
**************************
*/
delimiter //
drop procedure if exists pro_dm_all_ny;
create procedure pro_dm_all_ny()
begin
#声明一个标志done,用来判断游标是否遍历完成
declare v_done int default false;
#特别注意这里的名字不能与由游标中使用的列明相同,否则得到的数据都是null
declare v_idnumber varchar ( 255 ) default null;
declare v_phone bigint default null;
declare v_bankad varchar(200) default null;
declare v_batch_ny int default null;
declare v_instime_ny date default null;
#定义两个判断符
declare v_exists varchar(10) default null;
declare v_exists_ny varchar(10) default null;
#声明游标对应的 sql 语句
declare cur cursor for select idnumber,phone,bankad,batch_ny,instime from dw_bankdata_ny;
#在游标循环到最后会将 done 设置为 true
declare continue handler for not found set v_done = true;
#执行查询
open cur;
#遍历游标每一行
read_loop :loop
#提取游标里的数据
fetch cur into v_idnumber,v_phone,v_bankad,v_batch_ny,v_instime_ny;
#声明结束的时候
if v_done then
leave read_loop;
end if;
#判断是否有这个身份证号码
select count(*) into v_exists from dm_bankfor where idnumber = v_idnumber;
if v_exists = 0 then
#dm_bankfor表里没有这个身份证的数据,直接插入数据
insert into dm_bankfor ( idnumber, phone, bankad, batch_ny, instime_ny )
values( v_idnumber, v_phone, v_bankad, v_batch_ny, v_instime_ny );
else
#dm_bankfor表有这个身份证的数据,更新
update dm_bankfor set bankad='ny'
where idnumber = v_idnumber and length(bankad) = 0;
#
update dm_bankfor set bankad=concat(bankad,',',v_bankad)
where idnumber = v_idnumber and bankad not like ('%ny%') and length(bankad) <> 0;
#更新batch
update dm_bankfor set batch_ny = v_batch_ny where idnumber = v_idnumber and batch_ny is null;
#更新instime_ny
update dm_bankfor set instime_ny = v_instime_ny where idnumber = v_idnumber and instime_ny is null;
end if;
end loop;
#关闭游标
close cur;
end
//
delimiter ;
call pro_dm_all_ny();
#删除存储过程
#drop procedure pro_dm_all_ny;
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pymysql #支持Python3.0
##读取excel使用(支持03)
import xlrd
from datetime import datetime
from xlrd import xldate_as_tuple
from builtins import int
##打开数据库
conn = pymysql.connect(host="192.168.8.31",port=3306,user="root",passwd="123",db="dbBank",charset="utf8")
##打开游标
cur = conn.cursor()
##将excel文件导入mysql中
def importExcelToMysql(path):
#把ods表数据插入到history表
cur.execute("insert into ods_bankdata_gs_history select * from ods_bankdata_gs")
#删除表数据
cur.execute("delete from ods_bankdata_gs")
#根据Excel路径读取Excel
workbook = xlrd.open_workbook(path)
sheets = workbook.sheet_names()
#根据sheet名称获取sheet,sheets[0]为第一个表格名称
worksheet = workbook.sheet_by_name(sheets[0])
##遍历行
for i in range(1, worksheet.nrows):
row = worksheet.row(i)
##初始化数组
sqlstr = []
##遍历列
for j in range(0, worksheet.ncols):
##构造数组
sqlstr.append(worksheet.cell_value(i, j))
##插入数据库
valuestr = [str(sqlstr[0]), int(sqlstr[1]), str(sqlstr[2]), str(sqlstr[3])]
##执行sql语句
cur.execute(
"insert into ods_bankdata_gs(name,phone,idnumber,bankname,instime) " +
"values(%s,%s,%s,%s,sysdate())", valuestr)
#关闭游标
cur.close()
#提交
conn.commit()
#关闭连接
conn.close()
# 打印信息
rows = str(worksheet.nrows)
print("成功导入: %s 条数据"%rows)
#excel文件路径
read03path = r"D:\0data\python\juns\juns01.xlsx";
#开始执行时间
starttime = datetime.now()
print(starttime)
##调用函数
importExcelToMysql(read03path)
#结束时间
endtime = datetime.now()
print(endtime)