需求:结合odps对日志就行统计分析并将结果导入到mysql中
解决方法:结合odps命令行odpscmd和mysqldump、mysql以及contab完成该工作。
Shell:
#!/bin/bash
PORT="3306" #端口号
USERNAME="biuser" #用户名
PASSWORD="!#123date" #密码
DBNAME="bitest" #数据库名称
RUNDATE=`date +%Y-%m-%d`
period=`date +"%Y%m" -d "-1days"`
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/root/shenl/odpscmd_public/bin:/root/jdk1.7.0_75/bin
####1 同步其它库的相关数据
mysqldump -umdshop -p'xdata123' -h'192.168.128.33' testdb tb_carry tb_city --set-gtid-purged=OFF>testdb.sql
mysql -u$USERNAME -p$PASSWORD -D$DBNAME < '/root/testdb.sql'1>/root/syscODPS.log
###2 删除现有分类 时段 渠道 商品数据
rm -rf /tmp/timespan.txt
rm -rf /tmp/timearea.txt
rm -rf /tmp/seekarea.txt
rm -rf /tmp/tb_good.txt
###3 mysql里加工分类 时段 渠道 商品数据
mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/timespan.sql' 1>/root/shenl/synctimespan.log
###4 odps里删除分类 时段 渠道 商品数据
odpscmd -e "DROP TABLE tb_bi_meta_timespan;"
odpscmd -e "DROP TABLE tb_bi_meta_timearea;"
odpscmd -e "DROP TABLE tb_bi_meta_seekarea;"
odpscmd -e "DROP TABLE tb_good;"
###5 odps里创建分类 时段 渠道 商品数据以便后期加工
odpscmd -e "CREATE TABLE tb_bi_meta_timespan(catalogid int,catalogname string,span int, channel int,action int);"
odpscmd -e "CREATE TABLE tb_bi_meta_timearea(catalogid int,catalogname string,province string,province_id int,channel int,action int);"
odpscmd -e "CREATE TABLE tb_bi_meta_seekarea(province string,province_id int,channel int);"
odpscmd -e "create table tb_good(gid string,g_title string,g_catalog_id string,g_status string);"
###6 上传分类 时段 渠道 商品数据到odps对应的表中
odpscmd -e "tunnel upload /tmp/timespan.txt tb_bi_meta_timespan;"
odpscmd -e "tunnel upload /tmp/timearea.txt tb_bi_meta_timearea;"
odpscmd -e "tunnel upload /tmp/seekarea.txt tb_bi_meta_seekarea;"
odpscmd -e "tunnel upload /tmp/tb_good.txt tb_good;"
###7 按照分区插入日志数据
odpscmd -e "INSERT into table tb_bi_goodbrowse partition(periodsplit="$period")
SELECT channel,province,city,itemid,'' as goodclassify,datetrunc(requesttime,'DD') AS period,geid,userid,requesttime,action,getdate() as inserttime FROM tb_bi_marketlog A WHERE isdate(requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1,'dd'),'dd');"
###8 odps计算生成出报表数据和导出
odpscmd -f "/root/shenl/odpsstat0320.sql"
###9 删除现有的报表数据
rm -rf /var/log/mysql/timespanout.txt
rm -rf /var/log/mysql/timeareaout.txt
rm -rf /var/log/mysql/seekareaout.txt
###10 odps的报表数据下载到/var/log/mysql目录内
odpscmd -e "tunnel download max_compute.tb_bi_report_timespans /var/log/mysql/timespanout.txt"
chown mysql:mysql /var/log/mysql/timespanout.txt
odpscmd -e "tunnel downloadmax_compute.tb_bi_report_timeareas /var/log/mysql/timeareaout.txt"
chown mysql:mysql /var/log/mysql/timeareaout.txt
odpscmd -e "tunnel download -ni ''max_compute.tb_bi_report_seekareas /var/log/mysql/seekareaout.txt"
chown mysql:mysql /var/log/mysql/seekareaout.txt
###11 mysql导入odps里统计后的数据
mysql -u$USERNAME -p$PASSWORD -D$DBNAME <'/root/shenl/load2mysql.sql' 1>/root/shenl/load2mysql.log
1) 其中/root/shenl/timespan.sql的脚本内容:
use bi;
SELECT A.id AS catalogid,A.c_name catalogname,B.i AS span,C.i AS channel,D.i AS action INTO OUTFILE '/tmp/timespan.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0) A
CROSS JOIN tb_incr B
CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<4)C
CROSS JOIN (SELECT * FROM tb_incr WHERE i>0 AND i<5)D;
SELECT A.id AS catalogid,A.c_name catalogname,B.province,B.province_id,C.i AS channel,D.i AS action into outfile '/tmp/timearea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
FROM (SELECT id,c_name FROM shenl_catalog WHERE c_parent_id = 0)A CROSS JOIN (SELECT DISTINCT province,province_id FROM tb_bi_area)B CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<4)C CROSS JOIN(SELECT * FROM tb_incr WHERE i>0 AND i<5)D;
SELECT B.province,B.province_id,C.i AS channel into outfile '/tmp/seekarea.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'
FROM (SELECT DISTINCT province, province_id FROM tb_bi_area) B CROSS JOIN (SELECT * FROM tb_incr WHERE i > 0 AND i < 4) C;
SELECT gid,REPLACE(REPLACE(REPLACE(g_title,char(10),''),CHAR(13),''),',','') AS g_title,g_catalog_id,g_status into outfile '/tmp/shenl_goods.txt' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' FROM shenl_goods;
2) /root/shenl/odpsstat0320.sql 内统计脚本内容:
insert OVERWRITE table tb_bi_report_timespans
SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.span,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel,
A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes
FROM tb_bi_meta_timespan A
LEFT OUTER JOIN
(
SELECT C.g_catalog_id as catalogid,channel,datepart(browsetime,'hh') as spanid,COUNT(DISTINCT browsetime) as browsetimes,action from tb_bi_goodbrowse A
JOIN shenl_goods C
ON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')
GROUP BY C.g_catalog_id,datepart(browsetime,'hh'),channel,action
)D
ON A.catalogid = D.catalogid AND A.span = D.spanid AND A.channel = D.channel AND A.action = D.action;
insert OVERWRITE table tb_bi_report_timeareas
SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.catalogname,A.province,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel,
A.action,case when D.catalogid IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_timearea A
LEFT OUTER JOIN
(
SELECT B.province_id,period,channel,COUNT(1) as browsetimes,C.g_catalog_id as catalogid,action
from tb_bi_goodbrowse A
JOIN (SELECT distinct province,province_id FROM tb_bi_area)B
ON A.province = B.province_id
JOIN shenl_goods C
ON A.good = C.gid AND A.period=datetrunc(dateadd(getdate(), -1, 'dd'),'dd') AND A.periodsplit=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm')
GROUP BY B.province_id,C.g_catalog_id,period,channel,action
)D
ON A.catalogid = D.catalogid AND A.province_id = D.province_id AND A.channel=D.channel AND A.action = D.action;
insert overwrite table tb_bi_report_seekareas
SELECT datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,A.province,D.item as item,case A.channel WHEN 1 THEN "tel" WHEN 2 THEN "smell" WHEN 3 THEN "bigscreen" END AS channel,case when D.province_id IS NULL then 0 else browsetimes END as stattimes FROM tb_bi_meta_seekarea A
LEFT OUTER JOIN
(
SELECT B.province_id,datetrunc(dateadd(getdate(), -1, 'dd'),'dd') as period,channel,item,COUNT(DISTINCT requesttime) as browsetimes
from tb_bi_marketlog A
JOIN (SELECT distinct province,province_id FROM tb_bi_area)B
ON A.province = B.province_id AND A.action =2 AND isdate(A.requesttime,'yyyy-mm-dd hh:mi:ss') AND A.period=to_char(dateadd(getdate(), -1, 'dd'),'yyyymm') AND datetrunc(A.requesttime,'DD') = datetrunc(dateadd(getdate(), -1, 'dd'),'dd') GROUP BY B.province_id,period,channel,item
)D
ON A.province_id = D.province_id AND A.channel=D.channel
3) /root/shenl/load2mysql.sql脚本中的内容是:
load data infile '/var/log/mysql/timespanout.txt' into tabletb_bi_report_timespans fields terminated by ',' lines terminated by '\n';
load data infile '/var/log/mysql/timeareaout.txt' into tabletb_bi_report_timeareas fields terminated by ',' lines terminated by '\n';
load data infile '/var/log/mysql/seekareaout.txt' into tabletb_bi_report_seekareas fields terminated by ',' lines terminated by '\n';
代码解读:详见注释部分