本也没有什么特殊的,记录一下做的过程,备查。
impala、kudu、dataX
mysql-->kafka-->flink-->kudu-->impala-->mysql;中间通过java、python、sql等串联。
而我所能操作的是后半部分,数据进入kafka之后,impala查询结果导出到报表展示的mysql。
[root@ai-etl-c2-13 activity]# pwd
/data/activity
[root@ai-etl-c2-13 activity]# tree
.
|-- json
| |-- \\
| |-- promotion520_real_static3.json
| |-- promotion520_real_static41.json
| |-- promotion520_real_static42.json
| |-- promotion520_real_static51.json
| |-- promotion520_real_static52.json
| `-- promotion520_real_static53.json
|-- log
| |-- promotion520_real_static3.log
| |-- promotion520_real_static41.log
| |-- promotion520_real_static42.log
| |-- promotion520_real_static51.log
| |-- promotion520_real_static52.log
| |-- promotion520_real_static53.log
| |-- promotion520_yesterday_static4.log
| `-- promotion520_yesterday_static5.log
|-- shell
| |-- promotion520_real_static3.sh
| |-- promotion520_real_static41.sh
| |-- promotion520_real_static42.sh
| |-- promotion520_real_static51.sh
| |-- promotion520_real_static52.sh
| |-- promotion520_real_static53.sh
| |-- promotion520_yesterday_static4.sh
| |-- promotion520_yesterday_static5.sh
`-- sql
|-- 3-impala.sql
|-- 4-1-impala.sql
|-- 4-2-impala.sql
|-- 4-yesterday-impala.sql
|-- 5-1-impala.sql
|-- 5-2-impala.sql
|-- 5-3-impala.sql
`-- 5-yesterday-impala.sql
2.1、当天正常跑,凌晨回跑昨天一个脚本
[root@ai-etl-c2-13 shell]# pwd
/data/activity/shell
[root@ai-etl-c2-13 shell]# cat promotion520_real_static3.sh
#!/usr/bin/env bash
source /etc/profile
cur_time="`date +%H%M%S`"
today=`date +%Y-%m-%d`
yesterday=`date "+%Y-%m-%d" -d "-1 days"`
if [ ${cur_time} -le 500 ]
then date=$yesterday
else date=$today
fi
echo "date:${date}"
/bin/impala-shell -f /data/activity/sql/3-impala.sql --var=date=${date}
/bin/python /data/datax/bin/datax.py /data/activity/json/promotion520_real_static3.json
[root@ai-etl-c2-13 shell]#
2.2、当天正常跑,第二天回跑分开脚本
[root@ai-etl-c2-13 shell]# cat promotion520_real_static41.sh
#!/usr/bin/env bash
source /etc/profile
cur_date="`date +%Y-%m-%d`"
date=`date "+%Y-%m-%d"`
/bin/impala-shell -f /data/activity/sql/4-1-impala.sql
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static41.json
[root@ai-etl-c2-13 shell]# cat promotion520_real_static42.sh
#!/usr/bin/env bash
source /etc/profile
cur_date="`date +%Y-%m-%d`"
date=`date "+%Y-%m-%d"`
/bin/impala-shell -f /data/activity/sql/4-2-impala.sql
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static42.json
[root@ai-etl-c2-13 shell]#
2.3、昨日回跑脚本
[root@ai-etl-c2-13 shell]# cat promotion520_yesterday_static4.sh
#!/usr/bin/env bash
source /etc/profile
date=`date "+%Y-%m-%d" -d "-1 days"`
echo "date:${date}"
#date='2020-05-11'
/bin/impala-shell -f /data/activity/sql/4-yesterday-impala.sql --var=date=${date}
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static41.json
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static42.json
[root@ai-etl-c2-13 shell]#
3.1、建表脚本
CREATE TABLE api.promotion520_real_static3 ( data_date STRING, vists_uv BIGINT, get_choujiang_cnt BIGINT, choujiang_uid_cnt BIGINT, choujiang_cnt BIGINT ) STORED AS TEXTFILE LOCATION 'hdfs://ai-etl-c2-11:8020/user/hive/warehouse/api.db/promotion520_real_static3';
drop table if exists api.promotion520_real_static41;
CREATE TABLE api.promotion520_real_static41 (
vists_uv BIGINT,
partake_uv BIGINT,
share_cnt BIGINT,
share_entry_uv BIGINT,
register_newuser_uv BIGINT,
register_uv BIGINT )
PARTITIONED BY (dt STRING );
drop table if exists api.promotion520_real_static42;
CREATE TABLE api.promotion520_real_static42 (
type_name STRING,
uv BIGINT,
pv BIGINT )
PARTITIONED BY (dt STRING );
3.2、etl脚本
[root@ai-etl-c2-13 sql]# cat 3-impala.sql
insert overwrite api.promotion520_real_static3
with tab_burying_point as(
select '${var:date}' data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) vists_uv,
count(distinct case when type='click' and subtype='activityGame_chouJiang' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) choujiang_uid_cnt,
count(case when type='click' and subtype='activityGame_chouJiang' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) choujiang_cnt
from activity.rotary
where dt='${var:date}'),
tab_database as(
select '${var:date}' data_date,count(id) get_choujiang_cnt
from activity.activitylog
where bustype='2'
and from_unixtime(cast(created as int), 'yyyy-MM-dd')='${var:date}'
)
select a1.data_date,
a1.vists_uv,a2.get_choujiang_cnt,
a1.choujiang_uid_cnt,a1.choujiang_cnt
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;
[root@ai-etl-c2-13 sql]# pwd
/data/activity/sql
[root@ai-etl-c2-13 sql]# cat 4-*
insert overwrite api.promotion520_real_static41 partition(dt=substr(cast(current_timestamp() as string),1,10))
with tab_burying_point as(
select substr(cast(current_timestamp() as string),1,10) data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) vists_uv,
count(case when type='share' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) share_cnt,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' and r like '%shareTemplate%' then user else null end) share_entry_uv,-- uuri
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then sc else null end) register_newuser_uv, --shareActivity520_haoyangmao_{fromUri}
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then user else null end) register_uv
from activity.cut_wool
where dt=substr(cast(current_timestamp() as string),1,10)),
tab_database as(
select substr(cast(current_timestamp() as string),1,10) data_date,count(distinct userinfoId) partake_uv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')=substr(cast(current_timestamp() as string),1,10)
)
select -- a1.data_date,
a1.vists_uv,a2.partake_uv,
a1.share_cnt,a1.share_entry_uv,
a1.register_newuser_uv,a1.register_uv
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;
insert overwrite api.promotion520_real_static42 partition(dt=substr(cast(current_timestamp() as string),1,10))
select IFNULL(a2.type_name,cast(a1.type as string)) type_name,a1.uv,a1.pv
from (select cast(type as int) type,
count(distinct userinfoId) uv,count(id) pv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')=substr(cast(current_timestamp() as string),1,10)
group by cast(type as int)) a1
left join (select 0 type,'升级剪刀' type_name union all
select 1 type,'签到' type_name union all
select 2 type,'浏览预售会场拍品' type_name union all
select 3 type,'邀请好友参观预售会场' type_name union all
select 4 type,'浏览大宅云仓频道' type_name union all
select 5 type,'邀请好友观看鉴宝直播' type_name union all
select 6 type,'观看鉴宝直播' type_name union all
select 7 type,'浏览品质重器拍品' type_name union all
select 8 type,'邀请好友看看品质重器' type_name union all
select 9 type,'浏览正式会场拍品' type_name union all
select 10 type,'邀请好友浏览会场拍品' type_name union all
select 11 type,'加入活动直播间粉丝团' type_name union all
select 12 type,'任意活动拍品出价') a2 on a1.type=a2.type;
3.3、历史脚本
insert overwrite api.promotion520_real_static41 partition(dt='${var:date}')
with tab_burying_point as(
select '${var:date}' data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) vists_uv,
count(case when type='share' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) share_cnt,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' and r like '%shareTemplate%' then user else null end) share_entry_uv,-- uuri
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then sc else null end) register_newuser_uv, --shareActivity520_haoyangmao_{fromUri}
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then user else null end) register_uv
from activity.cut_wool
where dt='${var:date}'),
tab_database as(
select '${var:date}' data_date,count(distinct userinfoId) partake_uv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')='${var:date}'
)
select -- a1.data_date,
a1.vists_uv,a2.partake_uv,
a1.share_cnt,a1.share_entry_uv,
a1.register_newuser_uv,a1.register_uv
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;
insert overwrite api.promotion520_real_static42 partition(dt='${var:date}')
select IFNULL(a2.type_name,cast(a1.type as string)) type_name,a1.uv,a1.pv
from (select cast(type as int) type,
count(distinct userinfoId) uv,count(id) pv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')='${var:date}'
group by cast(type as int)) a1
left join (select 0 type,'升级剪刀' type_name union all
select 1 type,'签到' type_name union all
select 2 type,'浏览预售会场拍品' type_name union all
select 3 type,'邀请好友参观预售会场' type_name union all
select 4 type,'浏览大宅云仓频道' type_name union all
select 5 type,'邀请好友观看鉴宝直播' type_name union all
select 6 type,'观看鉴宝直播' type_name union all
select 7 type,'浏览品质重器拍品' type_name union all
select 8 type,'邀请好友看看品质重器' type_name union all
select 9 type,'浏览正式会场拍品' type_name union all
select 10 type,'邀请好友浏览会场拍品' type_name union all
select 11 type,'加入活动直播间粉丝团' type_name union all
select 12 type,'任意活动拍品出价') a2 on a1.type=a2.type;
[root@ai-etl-c2-13 sql]#
[root@ai-etl-c2-13 json]# pwd
/data/activity/json
[root@ai-etl-c2-13 json]# cat promotion520_real_static3.json
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"defaultFS": "hdfs://hdfshost:8020",
"path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static3",
"column":[
{
"index": 0,
"type": "String"
},
{
"index": 1,
"type": "Long"
},
{
"index": 2,
"type": "Long"
},
{
"index": 3,
"type": "Long"
},
{
"index": 4,
"type": "Long"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "admin",
"password": "mysqlpass",
"column":["data_date",
"vists_uv",
"get_choujiang_cnt",
"choujiang_uid_cnt",
"choujiang_cnt"
],
"session": [
"set session sql_mode='ANSI'"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
"table": ["promotion520_real_static3"]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 1,
"byte": 5242880
}
}
}
}
[root@ai-etl-c2-13 json]# cat promotion520_real_static41.json
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"defaultFS": "hdfs://hdfshost:8020",
"path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static41/dt=$date",
"column":[
{
"index": 0,
"type": "Long"
},
{
"index": 1,
"type": "Long"
},
{
"index": 2,
"type": "Long"
},
{
"index": 3,
"type": "Long"
},
{
"index": 4,
"type": "Long"
},
{
"index": 5,
"type": "Long"
},
{
"value": "$date",
"type": "date",
"format": "yyyy-MM-dd"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "admin",
"password": "mysqlpass",
"column":["vists_uv",
"partake_uv",
"share_cnt",
"share_entry_uv",
"register_newuser_uv",
"register_uv",
"dt"
],
"session": [
"set session sql_mode='ANSI'"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
"table": ["promotion520_real_static41"]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 1,
"byte": 5242880
}
}
}
}
[root@ai-etl-c2-13 json]# cat promotion520_real_static42.json
{
"job": {
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"defaultFS": "hdfs://hdfshost:8020",
"path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static42/dt=$date",
"column":[
{
"index": 0,
"type": "String"
},
{
"index": 1,
"type": "Long"
},
{
"index": 2,
"type": "Long"
},
{
"value": "$date",
"type": "date",
"format": "yyyy-MM-dd"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "replace",
"username": "admin",
"password": "mysqlpass",
"column":["type_name",
"uv",
"pv",
"dt"
],
"session": [
"set session sql_mode='ANSI'"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
"table": ["promotion520_real_static42"]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 1,
"byte": 5242880
}
}
}
}
[root@ai-etl-c2-13 ~]# crontab -l
*/1 * * * * flock -xn /tmp/my3.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static3.sh >>/data/activity/log/promotion520_real_static3.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my41.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static41.sh >>/data/activity/log/promotion520_real_static41.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my42.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static42.sh >>/data/activity/log/promotion520_real_static42.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my51.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static51.sh >>/data/activity/log/promotion520_real_static51.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my52.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static52.sh >>/data/activity/log/promotion520_real_static52.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my53.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static53.sh >>/data/activity/log/promotion520_real_static53.log 2>&1 &'
2 0,1,2,3,5,6 21 * * flock -xn /tmp/my-yesterday4.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_yesterday_static4.sh >>/data/activity/log/promotion520_yesterday_static4.log 2>&1 &'
2 0,1,2,3,5,6 21 * * flock -xn /tmp/my-yesterday5.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_yesterday_static5.sh >>/data/activity/log/promotion520_yesterday_static5.log 2>&1 &'