之前做了部分nginx qos分析和cdn日志qos分析计算程序开发的工作,涉及到对实时的和离线的nginx 日志的qos计算。不管是研发也好,运维也好,都需要关心自己负责的domain的qos情况,以此来判断业务的运行情况,容量情况,是否需要扩容,以及验证应用的变更是否影响用户访问等等。
"""select ip_province,round(SUM(IF(response<=10 AND STATUS='200',1,0))*100/COUNT(1),4) as less10ms_ratio, round(SUM(IF(response<=100 AND STATUS='200',1,0))*100/COUNT(1),4) as less100ms_ratio, round(SUM(IF(response<=1000 AND STATUS='200',1,0))*100/COUNT(1),4) as less1000ms_ratio, round(avg(response)/1000,4) as avg_rt,round(sum(size)*8/(1000*(sum(response)/1000)),2) as svg_speed FROM %s where dt = 'xxx' and domain = 'xxxx' and status='200' and hour in ('09','10','11') group by ip_province order by avg_rt desc """
SELECT regexp_extract(request,'(.*?) (.*?) (.*?)',2),http_referer,COUNT(1) AS COUNT_ALL FROM viplog.dw_nginx_log WHERE dt='xxx' and host='xxx' and status='404' GROUP BY regexp_extract(request,'(.*?) (.*?) (.*?)',2), http_referer ORDER BY COUNT_ALL DESC limit 20
"""select a.status as code,a.count_all,round((a.count_all/b.total)*100,4) as ratio from (select status,count(1) as count_all from viplog.dw_nginx_log where dt='xxx' and host='xxx' group by status ) a join (select count(1) as total from viplog.dw_nginx_log where host='%s' and dt='xxx') b order by ratio desc """
"""select vendor,domain,count_all,round((2xx_3xx_count/count_all)*100,4) as availability,avg_response_time,round((more_than1s_count/count_all)*100,4) as more_than1s_per,round((404_count/count_all)*100,4) as 404_ratio from cdn_qos where date='%s' and count_all > 10000 order by count_all desc """
"""SELECT * FROM ( SELECT concat(host,regexp_extract(request,'.+? +(.+?)(?:\\\?| )+.*',1)), COUNT(1) AS COUNT_ALL,round(avg(request_time),2) AS avg_rt FROM viplog.dw_nginx_log WHERE dt='%s' and host='%s' and (status rlike '^2.*' or status rlike '^3.*') and substr(time_local,14,5) >= '09:00' and substr(time_local,14,5)<='21:00' GROUP BY concat(host,regexp_extract(request,'.+? +(.+?)(?:\\\?| )+.*',1)) )a where a.COUNT_ALL > 10 and a.avg_rt > 0.01 ORDER BY a.avg_rt DESC limit 10 """
""" select a.service,a.api,a.cnt,b.avaible,avg_rt,more1s_ratio,404_ratio from ( select regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2) as service, regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) as api, count(1) as cnt from viplog.dw_nginx_log where dt='%s' and host='xxxx' and regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2) != '' and regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) != '' group by regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2), regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) ) a join (select service,api,cnt,round(2xx_3xx_count*100/cnt,4) as avaible,avg_rt, round(more_than1s_count*100/cnt,4) as more1s_ratio,round(404_count*100/cnt,4) as 404_ratio FROM (select regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2) as service, regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) as api,count(1) as cnt, (SUM(IF(SUBSTR(STATUS,0,1)='2',1,0)) + SUM(IF(SUBSTR(STATUS,0,1)='3',1,0))) as 2xx_3xx _count, ROUND(AVG(request_time),4) AS avg_rt,SUM(IF(request_time>1,1,0)) as more_than1s_count,SUM(IF(STATUS='404',1,0)) AS 404_count FROM viplog.dw_nginx_log where dt='%s' and host='xxxx' and regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2) != '' and regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) != '' group by regexp_extract(request,'.+? +(.+?)service=(.+?)&(.+) .+?',2), regexp_extract(request,'.+? +(.+?)api_key=(.+?)&(.+) .+?',2) order by cnt desc)a)b on a.service=b.service and a.api=b.api and a.cnt >10000 order by a.cnt desc """
select host,concat(substr(lt,1,18),'01') as st,parse_url(concat('http://',host,regexp_extract(request,'([^ ]*) ([^ ]*)',2)),'PATH') as url, sum(cast( case when status like '2%' or status like '3%' then body_bytes_sent else '0' end as int)) as 2xx3xx_body_size, sum(cast( case when status like '4%' then body_bytes_sent else '0' end as int)) as 4xx_body_size, sum(cast( case when status like '5%' then body_bytes_sent else '0' end as int)) as 5xx_body_size, sum(cast(body_bytes_sent as int)) as all_body_size , sum(cast( case when status like '2%' or status like '3%' then request_time else '0' end as int)) as 2xx3xx_response_time, sum(cast( case when status like '4%' then request_time else '0' end as int)) as 4xx_response_time, sum(cast( case when status like '5%' then request_time else '0' end as int)) as 5xx_response_time, sum(cast(request_time as int)) as all_response_time , sum(cast( case when status like '2%' or status like '3%' then 1 else 0 end as int)) as 2xx3xx_count, sum(cast( case when status like '4%' then 1 else 0 end as int)) as 4xx_count, sum(cast( case when status like '5%' then 1 else 0 end as int)) as 5xx_count, sum(1) as all_count from ( select regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 3) as lt,regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 4) as request, regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 11) as host,regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 6) as status,trim(regexp_replace(regexp_replace(regexp_replace(regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 5),'\\"',''),'\\.',''),'^0*','')) as request_time,regexp_extract(line,'([^ ]*) - ([^ ]*) \\\[(.*)\\\] "([^"]*)" ("[^"]*" )?(-|[0-9]*) (-|[0-9]*) "([^"]*)" "([^"]*)" (.*) ([^ ]*com) ([^ ]*)($| ([^ ]*))', 7) as body_bytes_sent from ods_nginx_log_5min_impala)tmp where host !='' and length(host)<=40 and (request !='-' or status !='400') and lower(request) not like '%\.jpg%' and lower(request) not like '%\.ico%' and lower(request) not like '%\.gif%' and lower(request) not like '%\.swf%' and lower(request) not like '%\.txt%' and lower(request) not like '%\.html%' and lower(request) not like '%\.js%' and lower(request) not like '%\.css%' and lower(request) not like '%\.png%' group by host,st,parse_url(concat('http://',host,regexp_extract(request,'([^ ]*) ([^ ]*)',2)),'PATH');