python 统计pvuv 二

test.py

#!/usr/bin/python
import os
import sys
import string
import json
#awk -F ',' 'substr($0,21,2)=='14'{print $0}' * > 14.log

def run():
	logfile = '14.log'
	res = dict()
	if not os.path.exists(logfile):
        	print 'error:' + logfile + ' not existed.'
        	print 'hint:awk -F \',\' \'substr($0,21,2)==\'14\'{print $0}\' WB_LOG_LOAD* > 14.log'
        	exit()

	#time zone
	dts = set()
	dts.add('2015-04-29 00')
	dts.add('2015-04-29 01')
	dts.add('2015-04-29 02')
	dts.add('2015-04-29 03')
	dts.add('2015-04-29 04')
	dts.add('2015-04-29 05')
	dts.add('2015-04-29 06')
	dts.add('2015-04-29 07')
	dts.add('2015-04-29 08')

	f = open(logfile,'r')
	line = f.readline()
	while line:
		if line[0:13] not in dts:
        		#print line[0:13]
			line = f.readline()
			continue

        	arr = line.split(',')

		if len(arr) != 7:
			line = f.readline()
			continue
		
        	#log id and setkey are existed.
        	if arr[2] and arr[5]:
                	#print arr[1][3:]
                	key = arr[5]

                	item = res.get(key, -1)
                	if -1 == item:
                        	item = {'pv':0, 'uvSet':set([])}
                        	res[key] = item
                	item['pv'] = item['pv'] + 1
                	item['uvSet'].add(arr[2])
        	#print len(arr)
		line = f.readline()
	f.close()

	for item in res:
        	print item + ' pv:' + str(res[item]['pv']) + ' uv:' + str(len(res[item]['uvSet']))



main.py

#!/usr/bin/python
import os
import sys
import test

if '__main__' == __name__:
	num = 20
	if len(sys.argv) > 1:
		num = int(sys.argv[1])
	#run(num)
	test.run()




你可能感兴趣的:(python 统计pvuv 二)