hive-1

 
  
##后台nginx日志mpi-web报表生成
use bi;
##set ($suffix = "${env.YYYY}${env.MM}${env.DD}");
##今日临时表创建
drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD};
create table if not exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}
(
server_ip string,
url string,
times int,
avg_response_today int,
avg_body_size int,
response_line95_today int,
count_50x int,
count_40x int,
count_30x int,
count_20x int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\005'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS ORC;
##昨日临时表创建
drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD};
create table if not exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD}
(
server_ip string,
url string,
times int,
avg_response_today int,
avg_body_size int,
response_line95_today int,
count_50x int,
count_40x int,
count_30x int,
count_20x int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\005'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
LINES TERMINATED BY '\n'
STORED AS ORC;
##今日报表生成
insert into table dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD}
select
server_ip,
concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url_s,
count(url) as times,
round(avg(response_time)*1000) as avg_response_today,
round(avg(body_bytes_sent)) as avg_body_size,
round(percentile(round(response_time*1000),0.95)) as response_line95_today,
sum(if(status like '50%' ,1 ,0)) as count_50x,
sum(if(status like '40%' ,1 ,0)) as count_40x,
sum(if(status like '30%' ,1 ,0)) as count_30x,
sum(if(status like '20%' ,1 ,0)) as count_20x
from
dpdw_nginx_log_all
where
application = 'mpi-web'
and hp_stat_date='${env.YYYYMMDD}'
and parse_url(url, 'PATH')<>'/index.jsp'
and parse_url(url, 'PATH')<>'/heartbeat.html'
and parse_url(url, 'HOST')<>'dp'
and parse_url(url, 'HOST')<>'www.dianping.com'
group by
concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,application
order by
times desc
;
##昨日报表生成
insert into table dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD}
select
server_ip as server_ip,
concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')) as url,
count(url) as times,
round(avg(response_time)*1000) as avg_response_today,
round(avg(body_bytes_sent)) as avg_body_size,
round(percentile(round(response_time*1000),0.95)) as response_line95_today,
sum(if(status like '50%' ,1 ,0)) as count_50x,
sum(if(status like '40%' ,1 ,0)) as count_40x,
sum(if(status like '30%' ,1 ,0)) as count_30x,
sum(if(status like '20%' ,1 ,0)) as count_20x
from
dpdw_nginx_log_all
where
application = 'mpi-web'
and hp_stat_date='${env.YYYYMMDD_P1D}'
and parse_url(url, 'PATH')<>'/index.jsp'
and parse_url(url, 'PATH')<>'/heartbeat.html'
and parse_url(url, 'HOST')<>'dp'
and parse_url(url, 'HOST')<>'www.dianping.com'
group by
concat(parse_url(url,'HOST'), regexp_replace(parse_url(url, 'PATH'),'[0-9|\+|\*]','')),server_ip,application
order by
times desc
;
insert overwrite table dprpt_mpi_nginx_log_daily partition(hp_cal_dt = '${env.YYYYMMDD}')
select
t.server_ip as server_ip,
t.url as url,
t.times as times,
coalesce(y.times,0) as times_cmp,
t.avg_response_today as avg_response_today,
coalesce(y.avg_response_today,0) as avg_response_cmp,
t.response_line95_today as response_line95_today,
coalesce(y.response_line95_today,0) as response_line95_today_cmp,
t.avg_body_size as avg_body_size,
t.count_20x as count_20x,
coalesce(y.count_20x,0) as count_20x_cmp,
t.count_30x as count_30x,
coalesce(y.count_30x,0) as count_30x_cmp,
t.count_40x as count_40x,
coalesce(y.count_40x,0) as count_40x_cmp,
t.count_50x as count_50x,
coalesce(y.count_50x,0) as count_50x_cmp
from dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD} t left outer join dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD} y
on t.url=y.url and t.server_ip=y.server_ip
order by times desc;
drop table if exists dpstg_mpi_nginx_log_today_${env.YYYY}${env.MM}${env.DD};
drop table if exists dpstg_mpi_nginx_log_yesterday_${env.YYYY}${env.MM}${env.DD};

你可能感兴趣的:(hive-1)