业务库下的表 wm.admin_user_app wm.department
1.在hive创建数据库;
CREATE DATABASE zxl_db;
CREATE DATABASE zxl_db_tmp;
2.在hive建kudu表,hive临时表;
# impala-shell
CREATE TABLE IF NOT EXISTS zxl_db_tmp.admin_user_app (
`id` bigint ,
`user_id` bigint ,
`app_id` bigint ,
`o_id` bigint ,
`c_id` bigint ,
`status` tinyint ,
`update_time` string ,
`create_time` string )
row format delimited fields terminated by '\t'
STORED AS TEXTFILE
# impala-shell
CREATE TABLE IF NOT EXISTS zxl_db.admin_user_app (
`id` bigint ,
`user_id` bigint ,
`app_id` bigint ,
`o_id` bigint ,
`c_id` bigint ,
`status` tinyint ,
`update_time` string ,
`create_time` string ,
PRIMARY KEY (`id`))
STORED AS KUDU;
CREATE TABLE zxl_db_tmp.department (
`dept_id` bigint,
`unit_code` string,
`parent_id` bigint,
`name` string,
`status` tinyint,
`sort` bigint,
`ext` string,
`update_time` string,
`create_time` string )
row format delimited fields terminated by '\t'
STORED AS TEXTFILE;
CREATE TABLE zxl_db.department (
`dept_id` bigint,
`unit_code` string,
`parent_id` bigint,
`name` string,
`status` tinyint,
`sort` bigint,
`ext` string,
`update_time` string,
`create_time` string,
PRIMARY KEY (`dept_id`))
STORED AS KUDU;
Note:
(1) 建kudu表时必须指明PRIMARY KEY
(2)建kudu表时必须指明为kudu存储类型
(3)建hive表最好指定分隔符 row format delimited fields terminated by '\t'
,本次测试从mysql抽数到hive,默认的分隔符是逗号或空格,抽数时指定 \t 分隔符导致数据都为null。
3.从mysql抽数到hive临时表
# 抽数
sudo -u hive sqoop import \
--connect jdbc:mysql://10.234.7.73:3306/wm?tinyInt1isBit=false \
--username work \
--password phkAmwrF \
--hive-database zxl_db_tmp \
--hive-table admin_user_app \
--query "select id,user_id,app_id,o_id,c_id,status,date_format(update_time, '%Y-%m-%d %H:%i:%s') update_time,date_format(create_time, '%Y-%m-%d %H:%i:%s') create_time from admin_user_app where 1=1 and \$CONDITIONS" \
--hive-import \
--null-string '\\N' \
--null-non-string '\\N' \
--fields-terminated-by "\t" \
--lines-terminated-by "\n" \
--delete-target-dir \
--target-dir /user/hive/import/admin_user_app \
--hive-drop-import-delims \
--hive-overwrite \
-m 1;
sudo -u hive sqoop import \
--connect jdbc:mysql://10.234.7.73:3306/wm?tinyInt1isBit=false \
--username work \
--password phkAmwrF \
--hive-database zxl_db_tmp \
--hive-table department \
--query "select dept_id,unit_code,parent_id,name,status,sort,ext,date_format(update_time, '%Y-%m-%d %H:%i:%s') update_time,date_format(create_time, '%Y-%m-%d %H:%i:%s') create_time from department where 1=1 and \$CONDITIONS" \
--hive-import \
--null-string '\\N' \
--null-non-string '\\N' \
--fields-terminated-by "\t" \
--lines-terminated-by "\n" \
--delete-target-dir \
--target-dir /user/hive/import/department \
--hive-drop-import-delims \
--hive-overwrite \
-m 1;
# 修复分区(若有分区则需要修复) # beeline
msck repair table zxl_db_tmp.admin_user_app
Note:
(a) time,date,datetime ,timestamp(非string类型)导入到hive时时间格式会有问题,如:“2018-07-17 10:01:54.0”;需要在导入时进行处理;
(b) tinyInt1isBit=false 是为了解决sqoop从mysql导入数据到hive时tinyint(1)格式自动变成Boolean;
© mysql到hive字段类型会发生改变,本例中mysql的int映射到hive变成了bigint,若要指定映射类型,需要在hive手动创建表指定数据类型;
4.从hive临时表抽数到kudu
# impala-shell
upsert into table zxl_db.admin_user_app select id,user_id,app_id,o_id,c_id,status,update_time,create_time from zxl_db_tmp.admin_user_app
upsert into table zxl_db.department select dept_id,unit_code,parent_id,name,status,sort,ext,,update_time,create_time from zxl_db_tmp.department
# 修复元数据 # impala-shell
invalidate metadata zxl_db.admin_user_app
invalidate metadata zxl_db.department
# 删除hive临时表 # beeline
drop table zxl_db_tmp.admin_user_app
1.创建管道
2.添加和配置 binlog采集组件
Initial offset 在数据库中使用 SHOW MASTER STATUS;
获取;
Include Tables配置需要实时同步的表,多个使用逗号隔开;
3.添加和配置流选择器,可用于过滤数据库
4.数据处理
for record in records:
newRecord = sdcFunctions.createRecord(record.sourceId + ':newRecordId')
try:
if record.value['Type'] == 'DELETE':
newRecord.attributes['sdc.operation.type']='2'
newRecord.value = record.value['OldData']
else:
newRecord.attributes['sdc.operation.type']='4';
newRecord.value = record.value['Data'];
# Write record to processor output
record.value['Type'] = record.value['Type']
newRecord.value['Table'] = record.value['Table']
output.write(newRecord)
except Exception as e:
# Send record to error
error.write(newRecord, str(e))
5.写入kudu
Table Name:impala::zxl_db.${record:value(’/Table’)} 表示将表数据写入zxl_db的表中
6.启动管道
Shylin