python 统计pvuv 一

数据格式:

2015-04-10 15:32:54
udid:user1
 setKey:flag1
 
2015-04-10 15:33:00
udid:user2
 setKey:flag2


...


#!/usr/bin/python
import os
import sys

def run(num=1):
#file param
	filePrefix='icap/flowtips'
	fileEx='.txt'


	'''
	input
	'''
	records=list()
	for i in range(num + 1):
		filePath=''.join([filePrefix,str(i),fileEx])
		if not os.path.exists(filePath):
			continue
	
		f=open(filePath,'r')
		line=f.readline()
		record=list()#record=''
		while line:
			#print line
			if not len(line.strip()):
				records.append('\t'.join(record))
				record=list()#record=''
			else:
				record.append(line.strip()) #+= line.strip() + '\t'

			line=f.readline()
		f.close()


	'''
	analysis
	'''
	data=dict()
	for s in records:
		#print s
		arr = s.split('\t')

		if len(arr) < 3:
			continue

		#key
		key = arr[0].strip()[0:10]# + '\t' + arr[2].strip()[7:]

		#value
		val = data.get(key, -1)
		if -1 == val:
			#create a new dict as value assosition to the key
			val = {'pv':0, 'uvSet':set([])}
			data[key] = val
		
		#pv++
		val['pv'] = val['pv'] + 1
		#add item to set
		val['uvSet'].add(arr[1])


	'''
	output
	'''
	for key in data:
		val = data[key]
		print ( '%s\t%d\t%d' % ( key, val['pv'], len(val['uvSet']) ) )


if '__main__' == __name__:
	num = 20
	if len(sys.argv) > 1:
		num = int(sys.argv[1])
	run(num)

用dict()存储统计,set([])统计uv

你可能感兴趣的:(python 统计pvuv 一)