背景需求:
点击日志中记录了浏览器UA信息,现在需要对UA进行分析,大致统计用在各个浏览器中的分布,给前端同学参考兼容性测试。
采用的方法:
第一步,从Hadoop集群中获取各个UA出现的频次,通过hive语句轻松实现。
第二步,使用awk脚本进行分析。
awk脚本内容:
#操作系统标示
#加密标示
#浏览器使用的主流渲染引擎有:Gecko、WebKit、KHTML、Presto、Trident、Tasman等,格式为:渲染引擎/版本信息
BEGIN{
sum=0;
split("2.0 2.1 2.2 2.3 3.0 3.1 3.2 4.0 4.1 4.2 4.4 5.0 5.1 5.2 5.5 6.0 7.0",versions);
split("6.0 8.0 9.0 10.0 11.0 12.0",ievs);
}
{sum = sum + $1}
{
if($0~/Windows NT/){
platform["Windows NT"]+=$1;
print $0> "click_wndows.data"
} else if($0~/Android/){
platform["Android"]+=$1;
print $0 > "click_android.data"
} else if($0~/Mac OS/){
platform["Mac OS"]+=$1;
print $0 > "click_mac.data"
} else if($2~/NULL/ || length($2)<1){
platform["kong"] += $1;
brower["kong"] += $1;
}else {
platform["other"] += $1;
print $0 > "click_other.data";
}
#渲染引擎
if($0~/WebKit/){
en["WebKit"] += $1;
} else if($0~/Gecko/ && $0!~/like Gecko/){
en["Gecko"] += $1;
} else if($0~/Trident/){
en["Trident"] += $1;
}else {
en["other"] += $1;
}
}
($0~/Windows NT/){
platform["Windows NT"]+=$1;
#print $0> "click_wndows.data"
}
($0~/Android/){
platform["Android"]+=$1;
#print $0 > "click_android.data"
for(v in versions){
reg="Android "versions[v]""
reg1="Android/"versions[v]
#print reg;
#print versions[v];
if(match($0,reg)>0 || match($0,reg)>0){
Android[versions[v]] += $1;
#print $0;
}
}
}
$0~/Mac OS/{
platform["Mac OS"]+=$1;
#print $0 > "click_mac.data";
if($0~/iPad/){
ios["iPad"] += $1
}else if($0~/iPhone/){
ios["iPhone"] += $1
}if($0~/Macintosh/){
ios["Macintosh"] += $1
}else{
ios["other"] += $1
}
}
$2~/NULL/ || length($2)<1{
platform["kong"] += $1;
brower["kong"] += $1;
}
#brower
$0~/MSIE/{
brower["ie"] += $1;
print $0 > "click_ie.data"
for(key in ievs){
reg="MSIE "ievs[key];
if(match($0,reg)>0){
ie[ievs[key]] += $1;
#print $0;
}else if(match($0,"Trident")>0 && match($0,"rv:11.0")>0){
ie["11.0"] += $1;
}
}
}
$0~/Firefox/{
brower["Firefox"] += $1;
}
$0~/UCBrowser/{
brower["UCBrowser"] += $1;
}
$0~/Safari/ && $0~/Mac OS/{
brower["Safari"] += $1;
}
$0~/Chrome/ && $0!~/QQBrowser/ && $0!~/UCBrowser/ && $0!~/baidu/{
brower["Chrome"] += $1;
}
$0~/MicroMessenger/{
brower["MicroMessenger"] += $1;
}
$0~/MQQBrowser/ && $0!~/MicroMessenger/{
brower["MQQBrowser"] += $1;
}
$0~/QQBrowser/ && $0!~/MicroMessenger/ && $0!~/MQQBrowser/{
brower["QQBrowser"] += $1;
}
$0~/baidu/{
brower["baidu"] += $1;
}
$0~/360browser/ || $0~/360SE/{
brower["360browser"] += $1;
}
$0~/Sogou/{
brower["Sogou"] += $1;
}
$0~/Opera/{
brower["Opera"] += $1;
}
$0~/Maxthon/{
brower["Maxthon"] += $1;
}
#mac
$0~/Mac OS/&&$0~/iPad/{
mac["ipad"] += $1;
}
END{
print "total,"sum
for(key in platform)
print key,",",platform[key]
print "brower , "
for(key in brower)
print key,",",brower[key]
print "mac platform, "
for(key in mac)
print key,",",mac[key]
print "android version, "
for(key in Android)
print key,",",Android[key]
print "ie-version, "
for(key in ie)
print key,",",ie[key]
print "en, "
for(key in en)
print key,",",en[key]
}
total 49763829
other 221645
Mac OS 9804817
Android 38271405
kong 19773110
Windows NT 31237758
----------------------
MicroMessenger 3694528
Opera 3781
360browser 215183
kong 19773110
Sogou 23226
baidu 278811
QQBrowser 743751
MQQBrowser 2599292
Safari 781210
Chrome 20922007
UCBrowser 434995
ie 4220817
Maxthon 97840
Firefox 239333
----------------------
ipad 575642
------android version------
5.5 90
3.2 616
6.0 2970281
4.0 102448
4.1 326754
7.0 1799
4.2 858749
2.0 99
2.1 5894
4.4 5427793
2.2 1201
5.0 1909275
2.3 127367
5.1 6337421
5.2 572
3.0 898
3.1 424
---------ie-version--------
6.0 591766
9.0 769165
11.0 94504
10.0 389497
8.0 1941706
---------en----------------
other 10676820
WebKit 34402802
Trident 4447105
Gecko 237102
---由于各个UA规范遵守的不好,各个浏览器为了兼容性,总是通过UA伪装得像别家,故统计脚本不严谨之处还请见谅