注意:所有的机器信息来自于machine_baseinfo机器基本信息表与machine_admin_map机器客户映射资料表。
USE `music`;
-- 1. TO_YCAK_MAC_D 机器基本信息表
CREATE EXTERNAL TABLE `TO_YCAK_MAC_D` (
`MID` int,
`SRL_ID` string,
`HARD_ID` string,
`SONG_WHSE_VER` string,
`EXEC_VER` string,
`UI_VER` string,
`IS_ONLINE` string,
`STS` int,
`CUR_LOGIN_TM` string,
`PAY_SW` string,
`LANG` int,
`SONG_WHSE_TYPE` int,
`SCR_TYPE` int
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_MAC_D';
-- 2. TO_YCAK_MAC_LOC_D 机器位置信息表
CREATE EXTERNAL TABLE `TO_YCAK_MAC_LOC_D` (
`MID` int,
`PRVC_ID` int,
`CTY_ID` int,
`PRVC` string,
`CTY` string,
`MAP_CLSS` string,
`LON` string,
`LAT` string,
`ADDR` string,
`ADDR_FMT` string,
`REV_TM` string,
`SALE_TM` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_MAC_LOC_D';
-- 3. TO_YCBK_MAC_ADMIN_MAP_D 机器客户映射资料表
CREATE EXTERNAL TABLE `TO_YCBK_MAC_ADMIN_MAP_D` (
`MID` int,
`MAC_NM` string,
`PKG_NUM` int,
`PKG_NM` string,
`INV_RATE` double,
`AGE_RATE` double,
`COM_RATE` double,
`PAR_RATE` double,
`DEPOSIT` double,
`SCENE_PRVC_ID` string,
`SCENE_CTY_ID` string,
`SCENE_AREA_ID` string,
`SCENE_ADDR` string,
`PRDCT_TYPE` string,
`SERIAL_NUM` string,
`HAD_MPAY_FUNC` int,
`IS_ACTV` int,
`ACTV_TM` string,
`ORDER_TM` string,
`GROUND_NM` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_MAC_ADMIN_MAP_D';
-- 4. TO_YCBK_MAC_STORE_MAP_D 机器门店映射关系表
CREATE EXTERNAL TABLE `TO_YCBK_MAC_STORE_MAP_D` (
`STORE_ID` int,
`MID` int,
`PRDCT_TYPE` int,
`ADMINID` int,
`CREAT_TM` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_MAC_STORE_MAP_D';
-- 5. TO_YCBK_STORE_D 门店信息表
CREATE EXTERNAL TABLE `TO_YCBK_STORE_D` (
`ID` int,
`STORE_NM` string,
`TAG_ID` string,
`TAG_NM` string,
`SUB_TAG_ID` string,
`SUB_TAG_NM` string,
`PRVC_ID` string,
`CTY_ID` string,
`AREA_ID` string,
`ADDR` string,
`GROUND_NM` string,
`BUS_TM` string,
`CLOS_TM` string,
`SUB_SCENE_CATGY_ID` string,
`SUB_SCENE_CATGY_NM` string,
`SUB_SCENE_ID` string,
`SUB_SCENE_NM` string,
`BRND_ID` string,
`BRND_NM` string,
`SUB_BRND_ID` string,
`SUB_BRND_NM` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_STORE_D';
-- 6. TO_YCBK_PRVC_D 机器省份日全量表
CREATE EXTERNAL TABLE `TO_YCBK_PRVC_D` (
`PRVC_ID` int,
`PRVC` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_PRVC_D';
-- 7. TO_YCBK_CITY_D 机器城市日全量表
CREATE EXTERNAL TABLE `TO_YCBK_CITY_D` (
`PRVC_ID` int,
`CTY_ID` int,
`CTY` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_CITY_D';
-- 8. TO_YCBK_AREA_D 机器区县日全量表
CREATE EXTERNAL TABLE `TO_YCBK_AREA_D` (
`CTY_ID` int,
`AREA_ID` int,
`AREA` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCBK_AREA_D';
-- 9. TW_MAC_BASEINFO_D 机器基础信息日全量表
CREATE EXTERNAL TABLE `TW_MAC_BASEINFO_D` (
`MID` int,
`MAC_NM` string,
`SONG_WHSE_VER` string,
`EXEC_VER` string,
`UI_VER` string,
`HARD_ID` string,
`SALE_TM` string,
`REV_TM` string,
`OPER_NM` string,
`PRVC` string,
`CTY` string,
`AREA` string,
`ADDR` string,
`STORE_NM` string,
`SCENCE_CATGY` string,
`SUB_SCENCE_CATGY` string,
`SCENE` string,
`SUB_SCENE` string,
`BRND` string,
`SUB_BRND` string,
`PRDCT_NM` string,
`PRDCT_TYP` int,
`BUS_MODE` string,
`INV_RATE` double,
`AGE_RATE` double,
`COM_RATE` double,
`PAR_RATE` double,
`IS_ACTV` int,
`ACTV_TM` string,
`PAY_SW` int,
`PRTN_NM` string,
`CUR_LOGIN_TM` string
) PARTITIONED BY (data_dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TW_MAC_BASEINFO_D';
create database ycak default character set utf8;
create database ycbk default character set utf8;
#!/bin/bash
ssh hadoop@node03 > /tmp/logs/music_project/machine-info.log 2>&1 <<aabbcc
hostname
source /etc/profile
# ycak
## machine_baseinfo ==>> TO_YCAK_MAC_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table machine_baseinfo --target-dir /user/hive/warehouse/music.db/TO_YCAK_MAC_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## machine_local_info ==>> TO_YCAK_MAC_LOC_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table machine_local_info --target-dir /user/hive/warehouse/music.db/TO_YCAK_MAC_LOC_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
# ycbk
## machine_admin_map ==>> TO_YCBK_MAC_ADMIN_MAP_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table machine_admin_map --target-dir /user/hive/warehouse/music.db/TO_YCBK_MAC_ADMIN_MAP_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## machine_store_map ==>> TO_YCBK_MAC_STORE_MAP_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table machine_store_map --target-dir /user/hive/warehouse/music.db/TO_YCBK_MAC_STORE_MAP_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## machine_store_info ==>> TO_YCBK_STORE_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table machine_store_info --target-dir /user/hive/warehouse/music.db/TO_YCBK_STORE_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## province_info ==>> TO_YCBK_PRVC_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table province_info --target-dir /user/hive/warehouse/music.db/TO_YCBK_PRVC_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## city_info ==>> TO_YCBK_CITY_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table city_info --target-dir /user/hive/warehouse/music.db/TO_YCBK_CITY_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## area_info ==>> TO_YCBK_AREA_D
sqoop import --connect jdbc:mysql://node01:3306/ycbk?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table area_info --target-dir /user/hive/warehouse/music.db/TO_YCBK_AREA_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
aabbcc
echo "all done!"
2_generate_tw_mac_baseinfo.sh
#!/bin/bash
currentDate=`date -d today +"%Y%m%d"`
if [ x"$1" = x ]; then
echo "====使用自动生成的今天日期===="
else
echo "====使用 Azkaban 传入的日期===="
currentDate=$1
fi
echo "日期为: $currentDate"
ssh hadoop@node01 > /tmp/logs/music_project/machine-info.log 2>&1 <<aabbcc
hostname
cd /bigdata/install/spark-2.3.3-bin-hadoop2.7/bin
./spark-submit --master yarn --class com.yw.musichw.eds.machine.GenerateTwMacBaseinfoD \
/bigdata/data/music_project/musicwh-1.0.0-SNAPSHOT-jar-with-dependencies.jar $currentDate
exit
aabbcc
echo "all done!"
machine-info.flow
,内容如下:nodes:
- name: Job1_ExtractMySQLDataToODS
type: command
config:
command: sh 1_extract_mysqldata_to_ods.sh
- name: Job2_GenerateTwMacBaseinfo
type: command
config:
command: sh 2_generate_tw_mac_baseinfo.sh ${mydate}
dependsOn:
- Job1_ExtractMySQLDataToODS
machine-info.flow
与 flow20.project
压缩生成 zip 文件 machine-info.zip
vim drop_machine_tables.sql
,内容如下:drop table `music`.`to_ycak_mac_d`;
drop table `music`.`to_ycak_mac_loc_d`;
drop table `music`.`to_ycbk_area_d`;
drop table `music`.`to_ycbk_city_d`;
drop table `music`.`to_ycbk_mac_admin_map_d`;
drop table `music`.`to_ycbk_mac_store_map_d`;
drop table `music`.`to_ycbk_prvc_d`;
drop table `music`.`to_ycbk_store_d`;
drop table `music`.`tw_mac_baseinfo_d`;
hive -f drop_machine_tables.sql
,删除表。由于这些都是外部表,真正的数据还在 HDFS,所以还需要删除相关的数据。vim create_machine_tables.sql
,内容在前面模型设计这一小节。hive -f create_machine_tables.sql
,创建表。machine-info.zip
USE `music`;
-- 1. TO_YCAK_USR_D 微信用户日全量表
CREATE EXTERNAL TABLE `TO_YCAK_USR_D` (
`UID` int,
`REG_MID` int,
`GDR` string,
`BIRTHDAY` string,
`MSISDN` string,
`LOC_ID` int,
`LOG_MDE` int,
`REG_TM` string,
`USR_EXP` string,
`SCORE` int,
`LEVEL` int,
`WX_ID` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_USR_D';
-- 2. TO_YCAK_USR_ALI_D 支付宝用户日全量表
CREATE EXTERNAL TABLE `TO_YCAK_USR_ALI_D` (
`UID` int,
`REG_MID` int,
`GDR` string,
`BIRTHDAY` string,
`MSISDN` string,
`LOC_ID` int,
`LOG_MDE` int,
`REG_TM` string,
`USR_EXP` string,
`SCORE` int,
`LEVEL` int,
`USR_TYPE` string,
`IS_CERT` string,
`IS_STDNT` string,
`ALY_ID` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_USR_ALI_D';
-- 3. TO_YCAK_USR_QQ_D QQ用户日全量表
CREATE EXTERNAL TABLE `TO_YCAK_USR_QQ_D` (
`UID` int,
`REG_MID` int,
`GDR` string,
`BIRTHDAY` string,
`MSISDN` string,
`LOC_ID` int,
`LOG_MDE` int,
`REG_TM` string,
`USR_EXP` string,
`SCORE` int,
`LEVEL` int,
`QQID` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_USR_QQ_D';
-- 4. TO_YCAK_USR_APP_D APP用户信息日全量表
CREATE EXTERNAL TABLE `TO_YCAK_USR_APP_D` (
`UID` int,
`REG_MID` int,
`GDR` string,
`BIRTHDAY` string,
`MSISDN` string,
`LOC_ID` int,
`REG_TM` string,
`USR_EXP` string,
`LEVEL` int,
`APP_ID` string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_USR_APP_D';
-- 5. TO_YCAK_USR_LOGIN_D 用户登录数据日增量表
CREATE EXTERNAL TABLE `TO_YCAK_USR_LOGIN_D` (
`ID` int,
`UID` int,
`MID` int,
`LOGIN_TM` string,
`LOGOUT_TM` string,
`MODE_TYPE` int
) PARTITIONED BY (`data_dt` string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TO_YCAK_USR_LOGIN_D';
-- 6. TW_USR_BASEINFO_D 活跃用户基础信息日增量表
CREATE EXTERNAL TABLE `TW_USR_BASEINFO_D` (
`UID` int,
`REG_MID` int,
`REG_CHNL` string,
`REF_UID` string,
`GDR` string,
`BIRTHDAY` string,
`MSISDN` string,
`LOC_ID` int,
`LOG_MDE` string,
`REG_DT` string,
`REG_TM` string,
`USR_EXP` string,
`SCORE` int,
`LEVEL` int,
`USR_TYPE` string,
`IS_CERT` string,
`IS_STDNT` string
) PARTITIONED BY (`data_dt` string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION 'hdfs://node01/user/hive/warehouse/music.db/TW_USR_BASEINFO_D';
1_extract_mysqldata_to_ods.sh
内容如下:#!/bin/bash
ssh hadoop@node03 > /tmp/logs/music_project/user-info.log 2>&1 <<aabbcc
hostname
source /etc/profile
# ycak
## user_wechat_baseinfo ==>> TO_YCAK_USR_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table user_wechat_baseinfo --target-dir /user/hive/warehouse/music.db/TO_YCAK_USR_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## user_alipay_baseinfo ==>> TO_YCAK_USR_ALI_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table user_alipay_baseinfo --target-dir /user/hive/warehouse/music.db/TO_YCAK_USR_ALI_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## user_qq_baseinfo ==>> TO_YCAK_USR_QQ_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table user_qq_baseinfo --target-dir /user/hive/warehouse/music.db/TO_YCAK_USR_QQ_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
## user_app_baseinfo ==>> TO_YCAK_USR_APP_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table user_app_baseinfo --target-dir /user/hive/warehouse/music.db/TO_YCAK_USR_APP_D/ --delete-target-dir --num-mappers 1 --fields-terminated-by '\t'
aabbcc
echo "all done!"
2_incr_extract_mysqldata_to_ods.sh
内容如下:#!/bin/bash
currentDate=`date -d today +"%Y%m%d"`
if [ x"$1" = x ]; then
echo "====没有导入数据的日期,输入日期===="
exit
else
echo "====使用导入数据的日期 ===="
currentDate=$1
fi
echo "日期为 : $currentDate"
# 查询hive ODS层表 to_ycak_usr_login_d 中目前存在的最大的ID
result=`ssh hadoop@node03 "source /etc/profile;hive -e 'select max(id) from music.to_ycak_usr_login_d'" | grep _c0 -A 1`
maxId=`echo ${result} | awk "{print \\$2}"`
if [ x"${maxId}" = xNULL ]; then
maxId=0
fi
echo "Hive ODS层表 TO_YCAK_USR_LOGIN_D 最大的ID是${maxId}"
ssh hadoop@node03 > /tmp/logs/music_project/user-login-info.log 2>&1 <<aabbcc
hostname
source /etc/profile
## user_login_info ==>> TO_YCAK_USR_LOGIN_D
sqoop import --connect jdbc:mysql://node01:3306/ycak?dontTrackOpenResources=true\&defaultFetchSize=10000\&useCursorFetch=true\&useUnicode=yes\&characterEncoding=utf8 --username root --password 123456 --table user_login_info --target-dir /user/hive/warehouse/music.db/TO_YCAK_USR_LOGIN_D/data_dt=${currentDate} --num-mappers 1 --fields-terminated-by '\t' --incremental append --check-column id --last-value ${maxId}
# 更新Hive 分区
hive -e 'alter table music.to_ycak_usr_login_d add partition(data_dt=${currentDate})'
exit
aabbcc
echo "all done!"
3_generate_tw_usr_baseinfo_d.sh
#!/bin/bash
currentDate=`date -d today +"%Y%m%d"`
if [ x"$1" = x ]; then
echo "====使用自动生成的今天日期===="
else
echo "====使用 Azkaban 传入的日期===="
currentDate=$1
fi
echo "日期为: $currentDate"
ssh hadoop@node01 > /tmp/logs/music_project/user-info.log 2>&1 <<aabbcc
hostname
cd /bigdata/install/spark-2.3.3-bin-hadoop2.7/bin
./spark-submit --master yarn --class com.yw.musichw.eds.user.GenerateTwUsrBaseinfoD \
/bigdata/data/music_project/musicwh-1.0.0-SNAPSHOT-jar-with-dependencies.jar $currentDate
exit
aabbcc
echo "all done!"
user-info.flow
,内容如下:nodes:
- name: Job1_ExtractMySQLDataToODS
type: command
config:
command: sh 1_extract_mysqldata_to_ods.sh
command.1: sh 2_incr_extract_mysqldata_to_ods.sh ${mydate}
- name: Job2_GenerateTwUsrBaseinfo
type: command
config:
command: sh 3_generate_tw_usr_baseinfo_d.sh ${mydate}
dependsOn:
- Job1_ExtractMySQLDataToODS
user-info.flow
与 flow20.project
压缩生成 zip 文件 user-info.zip
vim drop_user_tables.sql
,内容如下:drop table `music`.`to_ycak_usr_ali_d`;
drop table `music`.`to_ycak_usr_app_d`;
drop table `music`.`to_ycak_usr_d`;
drop table `music`.`to_ycak_usr_login_d`;
drop table `music`.`to_ycak_usr_qq_d`;
drop table `music`.`tw_usr_baseinfo_d`;
hive -f drop_user_tables.sql
,删除表。由于这些都是外部表,真正的数据还在 HDFS,所以还需要删除相关的数据。vim create_user_tables.sql
,内容在前面模型设计这一小节。hive -f create_user_tables.sql
,创建表。user-info.zip
case when BIRTHDAY is null then '无信息' when BIRTHDAY = 'null' then '无信息' else BIRTHDAY end
case when REG_DT is null then '无信息' when REG_DT = 'null' then '无信息' else REG_DT end