520活动参与做的实时报表部分全景记录

本也没有什么特殊的,记录一下做的过程,备查。

环境:

impala、kudu、dataX


数据的大体流向:

mysql-->kafka-->flink-->kudu-->impala-->mysql;中间通过java、python、sql等串联。
而我所能操作的是后半部分,数据进入kafka之后,impala查询结果导出到报表展示的mysql。
 

1、程序文件路径

[root@ai-etl-c2-13 activity]# pwd
/data/activity
[root@ai-etl-c2-13 activity]# tree
.
|-- json
|   |-- \\
|   |-- promotion520_real_static3.json
|   |-- promotion520_real_static41.json
|   |-- promotion520_real_static42.json
|   |-- promotion520_real_static51.json
|   |-- promotion520_real_static52.json
|   `-- promotion520_real_static53.json
|-- log
|   |-- promotion520_real_static3.log
|   |-- promotion520_real_static41.log
|   |-- promotion520_real_static42.log
|   |-- promotion520_real_static51.log
|   |-- promotion520_real_static52.log
|   |-- promotion520_real_static53.log
|   |-- promotion520_yesterday_static4.log
|   `-- promotion520_yesterday_static5.log
|-- shell
|   |-- promotion520_real_static3.sh
|   |-- promotion520_real_static41.sh
|   |-- promotion520_real_static42.sh
|   |-- promotion520_real_static51.sh
|   |-- promotion520_real_static52.sh
|   |-- promotion520_real_static53.sh
|   |-- promotion520_yesterday_static4.sh
|   |-- promotion520_yesterday_static5.sh
`-- sql
    |-- 3-impala.sql
    |-- 4-1-impala.sql
    |-- 4-2-impala.sql
    |-- 4-yesterday-impala.sql
    |-- 5-1-impala.sql
    |-- 5-2-impala.sql
    |-- 5-3-impala.sql
    `-- 5-yesterday-impala.sql

 

2、shell脚本

2.1、当天正常跑,凌晨回跑昨天一个脚本
[root@ai-etl-c2-13 shell]# pwd
/data/activity/shell

[root@ai-etl-c2-13 shell]# cat promotion520_real_static3.sh 
#!/usr/bin/env bash

source /etc/profile

cur_time="`date +%H%M%S`"
today=`date +%Y-%m-%d`
yesterday=`date "+%Y-%m-%d" -d  "-1 days"`

if [ ${cur_time} -le 500 ]
then date=$yesterday
else date=$today
fi

echo "date:${date}"

/bin/impala-shell -f /data/activity/sql/3-impala.sql --var=date=${date}
/bin/python /data/datax/bin/datax.py /data/activity/json/promotion520_real_static3.json
[root@ai-etl-c2-13 shell]# 

2.2、当天正常跑,第二天回跑分开脚本

[root@ai-etl-c2-13 shell]# cat promotion520_real_static41.sh
#!/usr/bin/env bash

source /etc/profile

cur_date="`date +%Y-%m-%d`"
date=`date "+%Y-%m-%d"`

/bin/impala-shell -f /data/activity/sql/4-1-impala.sql
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static41.json

[root@ai-etl-c2-13 shell]# cat promotion520_real_static42.sh 
#!/usr/bin/env bash

source /etc/profile

cur_date="`date +%Y-%m-%d`"
date=`date "+%Y-%m-%d"`

/bin/impala-shell -f /data/activity/sql/4-2-impala.sql
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static42.json

[root@ai-etl-c2-13 shell]# 

2.3、昨日回跑脚本

[root@ai-etl-c2-13 shell]# cat promotion520_yesterday_static4.sh 
#!/usr/bin/env bash

source /etc/profile

date=`date "+%Y-%m-%d" -d  "-1 days"`
echo "date:${date}"

#date='2020-05-11'

/bin/impala-shell -f /data/activity/sql/4-yesterday-impala.sql --var=date=${date}
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static41.json
/bin/python /data/datax/bin/datax.py -p "-Ddate=${date}" /data/activity/json/promotion520_real_static42.json


[root@ai-etl-c2-13 shell]# 

3、impala上sql逻辑脚本

3.1、建表脚本

CREATE TABLE api.promotion520_real_static3 (   data_date STRING,   vists_uv BIGINT,   get_choujiang_cnt BIGINT,   choujiang_uid_cnt BIGINT,   choujiang_cnt BIGINT ) STORED AS TEXTFILE LOCATION 'hdfs://ai-etl-c2-11:8020/user/hive/warehouse/api.db/promotion520_real_static3';
drop table if exists api.promotion520_real_static41;
CREATE TABLE api.promotion520_real_static41 (
vists_uv BIGINT,
partake_uv BIGINT,
share_cnt BIGINT,
share_entry_uv BIGINT,
register_newuser_uv BIGINT,
register_uv BIGINT )
PARTITIONED BY (dt STRING );
drop table if exists api.promotion520_real_static42;
CREATE TABLE api.promotion520_real_static42 (
type_name STRING,
uv BIGINT,
pv BIGINT )
PARTITIONED BY (dt STRING );

3.2、etl脚本

[root@ai-etl-c2-13 sql]# cat 3-impala.sql 
insert overwrite api.promotion520_real_static3
with tab_burying_point as(
select '${var:date}' data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) vists_uv,
count(distinct case when type='click' and subtype='activityGame_chouJiang' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) choujiang_uid_cnt,
count(case when type='click' and subtype='activityGame_chouJiang' and pagename='https://w.weipaitang.com/webApp/activity/2020520/rotary' then user else null end) choujiang_cnt
from activity.rotary
where dt='${var:date}'),
tab_database as(
select '${var:date}' data_date,count(id) get_choujiang_cnt
from activity.activitylog
where bustype='2'
and from_unixtime(cast(created as int), 'yyyy-MM-dd')='${var:date}'
)
select a1.data_date,
a1.vists_uv,a2.get_choujiang_cnt,
a1.choujiang_uid_cnt,a1.choujiang_cnt
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;

[root@ai-etl-c2-13 sql]# pwd
/data/activity/sql
[root@ai-etl-c2-13 sql]# cat 4-*
insert overwrite api.promotion520_real_static41 partition(dt=substr(cast(current_timestamp() as string),1,10))
with tab_burying_point as(
select substr(cast(current_timestamp() as string),1,10) data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) vists_uv,
count(case when type='share' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) share_cnt,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' and r like '%shareTemplate%' then user else null end) share_entry_uv,-- uuri
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then sc else null end) register_newuser_uv, --shareActivity520_haoyangmao_{fromUri}
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then user else null end) register_uv
from activity.cut_wool
where dt=substr(cast(current_timestamp() as string),1,10)),
tab_database as(
select substr(cast(current_timestamp() as string),1,10) data_date,count(distinct userinfoId) partake_uv
from activity.cutwooluserwoolrecord 
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')=substr(cast(current_timestamp() as string),1,10)
)
select -- a1.data_date,
a1.vists_uv,a2.partake_uv,
a1.share_cnt,a1.share_entry_uv,
a1.register_newuser_uv,a1.register_uv
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;
insert overwrite api.promotion520_real_static42 partition(dt=substr(cast(current_timestamp() as string),1,10))
select IFNULL(a2.type_name,cast(a1.type as string)) type_name,a1.uv,a1.pv
from (select cast(type as int) type,
count(distinct userinfoId) uv,count(id) pv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')=substr(cast(current_timestamp() as string),1,10)
group by cast(type as int)) a1
left join (select 0 type,'升级剪刀'                 type_name union all
select 1 type,'签到'                   type_name union all
select 2 type,'浏览预售会场拍品'             type_name union all
select 3 type,'邀请好友参观预售会场'           type_name union all
select 4 type,'浏览大宅云仓频道'             type_name union all
select 5 type,'邀请好友观看鉴宝直播'           type_name union all
select 6 type,'观看鉴宝直播'               type_name union all
select 7 type,'浏览品质重器拍品'             type_name union all
select 8 type,'邀请好友看看品质重器'           type_name union all
select 9 type,'浏览正式会场拍品'             type_name union all
select 10 type,'邀请好友浏览会场拍品'          type_name union all
select 11 type,'加入活动直播间粉丝团'          type_name union all
select 12 type,'任意活动拍品出价') a2 on a1.type=a2.type;

3.3、历史脚本

insert overwrite api.promotion520_real_static41 partition(dt='${var:date}')
with tab_burying_point as(
select '${var:date}' data_date,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) vists_uv,
count(case when type='share' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' then user else null end) share_cnt,
count(distinct case when type='visit' and pagename='https://w.weipaitang.com/webApp/activity/2020520/wool' and r like '%shareTemplate%' then user else null end) share_entry_uv,-- uuri
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then sc else null end) register_newuser_uv, --shareActivity520_haoyangmao_{fromUri}
count(distinct case when type='register' and sc like'%shareActivity520_haoyangmao%' then user else null end) register_uv
from activity.cut_wool
where dt='${var:date}'),
tab_database as(
select '${var:date}' data_date,count(distinct userinfoId) partake_uv
from activity.cutwooluserwoolrecord 
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')='${var:date}'
)
select -- a1.data_date,
a1.vists_uv,a2.partake_uv,
a1.share_cnt,a1.share_entry_uv,
a1.register_newuser_uv,a1.register_uv
from tab_burying_point a1
left join tab_database a2 on a1.data_date=a2.data_date;

insert overwrite api.promotion520_real_static42 partition(dt='${var:date}')
select IFNULL(a2.type_name,cast(a1.type as string)) type_name,a1.uv,a1.pv
from (select cast(type as int) type,
count(distinct userinfoId) uv,count(id) pv
from activity.cutwooluserwoolrecord
where from_unixtime(cast(createTime as int),'yyyy-MM-dd')='${var:date}'
group by cast(type as int)) a1
left join (select 0 type,'升级剪刀'                 type_name union all
select 1 type,'签到'                   type_name union all
select 2 type,'浏览预售会场拍品'             type_name union all
select 3 type,'邀请好友参观预售会场'           type_name union all
select 4 type,'浏览大宅云仓频道'             type_name union all
select 5 type,'邀请好友观看鉴宝直播'           type_name union all
select 6 type,'观看鉴宝直播'               type_name union all
select 7 type,'浏览品质重器拍品'             type_name union all
select 8 type,'邀请好友看看品质重器'           type_name union all
select 9 type,'浏览正式会场拍品'             type_name union all
select 10 type,'邀请好友浏览会场拍品'          type_name union all
select 11 type,'加入活动直播间粉丝团'          type_name union all
select 12 type,'任意活动拍品出价') a2 on a1.type=a2.type;
[root@ai-etl-c2-13 sql]# 

4、datax 传数脚本

[root@ai-etl-c2-13 json]# pwd
/data/activity/json

[root@ai-etl-c2-13 json]# cat promotion520_real_static3.json
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hdfsreader",
                    "parameter": {
                        "defaultFS": "hdfs://hdfshost:8020",
                        "path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static3",
                        "column":[
                                {
                                  "index": 0,
                                  "type": "String"
                                },
                                {
                                  "index": 1,
                                  "type": "Long"
                                },
                                {
                                  "index": 2,
                                  "type": "Long"
                                },
                                {
                                  "index": 3,
                                  "type": "Long"
                                },
                                {
                                  "index": 4,
                                  "type": "Long"
                                }
                                
                        ],
                        "fileType": "text",
                        "encoding": "UTF-8",
                        "fieldDelimiter": "\u0001"
                    }
                },
                "writer": {
                    "name": "mysqlwriter",
                    "parameter": {
                        "writeMode": "replace",
                        "username": "admin",
                        "password": "mysqlpass",
                        "column":["data_date",
                                  "vists_uv",
                                  "get_choujiang_cnt",
                                  "choujiang_uid_cnt",
                                  "choujiang_cnt"
                                 ],
                        "session": [
                            "set session sql_mode='ANSI'"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
                                "table": ["promotion520_real_static3"]
                            }
                        ]
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": 1,
                "byte": 5242880
            }
        }
    }
}
[root@ai-etl-c2-13 json]# cat promotion520_real_static41.json
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hdfsreader",
                    "parameter": {
                        "defaultFS": "hdfs://hdfshost:8020",
                        "path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static41/dt=$date",
                        "column":[
                                {
                                  "index": 0,
                                  "type": "Long"
                                },
                                {
                                  "index": 1,
                                  "type": "Long"
                                },
                                {
                                  "index": 2,
                                  "type": "Long"
                                },
                                {
                                  "index": 3,
                                  "type": "Long"
                                },
                                {
                                  "index": 4,
                                  "type": "Long"
                                },
                                {
                                  "index": 5,
                                  "type": "Long"
                                },
                                {
                                    "value": "$date",
                                    "type": "date",
                                    "format": "yyyy-MM-dd"
                                }

                        ],
                        "fileType": "text",
                        "encoding": "UTF-8",
                        "fieldDelimiter": "\u0001"
                    }
                },
                "writer": {
                    "name": "mysqlwriter",
                    "parameter": {
                        "writeMode": "replace",
                        "username": "admin",
                        "password": "mysqlpass",
                        "column":["vists_uv",
                                  "partake_uv",
                                  "share_cnt",
                                  "share_entry_uv",
                                  "register_newuser_uv",
                                  "register_uv",
                                  "dt"
                                 ],
                        "session": [
                            "set session sql_mode='ANSI'"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
                                "table": ["promotion520_real_static41"]
                            }
                        ]
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": 1,
                "byte": 5242880
            }
        }
    }
}
[root@ai-etl-c2-13 json]# cat promotion520_real_static42.json
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hdfsreader",
                    "parameter": {
                        "defaultFS": "hdfs://hdfshost:8020",
                        "path": "hdfs://hdfshost:8020/user/hive/warehouse/api.db/promotion520_real_static42/dt=$date",
                        "column":[
                                {
                                  "index": 0,
                                  "type": "String"
                                },
                                {
                                  "index": 1,
                                  "type": "Long"
                                },
                                {
                                  "index": 2,
                                  "type": "Long"
                                },
                                {
                                    "value": "$date",
                                    "type": "date",
                                    "format": "yyyy-MM-dd"
                                }

                        ],
                        "fileType": "text",
                        "encoding": "UTF-8",
                        "fieldDelimiter": "\u0001"
                    }
                },
                "writer": {
                    "name": "mysqlwriter",
                    "parameter": {
                        "writeMode": "replace",
                        "username": "admin",
                        "password": "mysqlpass",
                        "column":["type_name",
                                  "uv",
                                  "pv",
                                  "dt"
                                 ],
                        "session": [
                            "set session sql_mode='ANSI'"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://mysqlhost:9696/realtime?serverTimezone=Asia/Shanghai",
                                "table": ["promotion520_real_static42"]
                            }
                        ]
                    }
                }
            }
        ],
        "setting": {
            "speed": {
                "channel": 1,
                "byte": 5242880
            }
        }
    }
}

 

5、调度

[root@ai-etl-c2-13 ~]# crontab -l

*/1 * * * * flock -xn /tmp/my3.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static3.sh >>/data/activity/log/promotion520_real_static3.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my41.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static41.sh >>/data/activity/log/promotion520_real_static41.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my42.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static42.sh >>/data/activity/log/promotion520_real_static42.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my51.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static51.sh >>/data/activity/log/promotion520_real_static51.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my52.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static52.sh >>/data/activity/log/promotion520_real_static52.log 2>&1 &'
*/1 * 20 * * flock -xn /tmp/my53.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_real_static53.sh >>/data/activity/log/promotion520_real_static53.log 2>&1 &'

2 0,1,2,3,5,6 21 * * flock -xn /tmp/my-yesterday4.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_yesterday_static4.sh >>/data/activity/log/promotion520_yesterday_static4.log 2>&1 &'
2 0,1,2,3,5,6 21 * * flock -xn /tmp/my-yesterday5.lock -c 'source /etc/profile && sh /data/activity/shell/promotion520_yesterday_static5.sh >>/data/activity/log/promotion520_yesterday_static5.log 2>&1 &'

 

你可能感兴趣的:(demandProc,初阶实时数仓相关)