python多线程代码实例

因业务需求,写了一个python多线程处理数据的程序,记录下来,方便以后参考:

#!/usr/bin/python

'''
coding=utf-8

bossid.fy.mapping.itil.dic
2611:171:video_p2p_pieces
2125:82:live_pcclients
1365:142:appnews_1365
1446:143:appnews_1446 
2181:144:appnews_2181 

note:please care the data dir(line:131),and get bossid(line 78)
'''

import urllib2
import json
import os
import os.path
import threading
import datetime
import time
import re
#import nameapi
import sys
if not "/usr/local/zk_l5_name/zk_ex_named/names" in sys.path:
	sys.path.append("/usr/local/zk_l5_name/zk_ex_named/names")
if not 'nameapi' in sys.modules:
	nameapi=__import__('nameapi')
else:
	eval('import nameapi')
	nameapi=eval('reload(nameapi)')

class Th(threading.Thread):
	def __init__(self,strBossIdConf,processer):
		threading.Thread.__init__(self)
		self._strBossIdConf = strBossIdConf
		self._processer = processer
		
	def run(self):
		self._processer.LoadBossIdData(self._strBossIdConf)

class BossDataProcesser:
	def GetBossIdColInfo(self,strBossId):
		"""
		Get columns info of bossid
		"""
		strurl='http://beehive.boss.webdev.com/bossid/api/getAllIds?bossid='+str(strBossId)
		#print strurl
		req = urllib2.Request(strurl)
		response = urllib2.urlopen(req)
		strContent = response.read()
		#print strContent
		
		objJson = json.loads(strContent)
		lstColInfo = list()
		for key in objJson:
			if key == "data":
				cols = objJson[key]
				for col in cols:
					colname = col['name']
					coltype = col['type']
					collen = col['len']
					#print "name:" + colname + " type:" + coltype + " len:" + collen
					if cmp(coltype,'varchar') == 0:
						lstColInfo.append(collen)
					else:
						lstColInfo.append(-1)
		
		return lstColInfo

	def GetFyIdColInfo(self,strFyId):
		"""
		Get columns info of bossid
		"""
		strurl='http://fy.webdev.com/php/interface/getTableInfo.php?id='+str(strFyId)
		#print strurl
		req = urllib2.Request(strurl)
		response = urllib2.urlopen(req)
		strContent = response.read()
		#print strContent
		
		objJson = json.loads(strContent)
		lstColInfo = list()
		for key in objJson:
			if key == "data":
				cols = objJson[key]
				for col in cols:
					colname = col['f_Name']
					coltype = col['f_Type']
					collen = col['f_Length']
					#print "name:" + colname + " type:" + coltype + " len:" + collen
					if cmp(coltype,'VARCHAR') == 0:
						lstColInfo.append(collen)
					else:
						lstColInfo.append(-1)
		
		return lstColInfo
	
	def GetBossidColInfoFromFile(self,strBossId):
		today = datetime.datetime.today().strftime("%Y%m%d")
		strcol = ''
		filename = "/tmp/fycolinfo.info"
		fileobj = open(filename)
		
		try:
			while True:
				strLine = fileobj.readline()
				if not strLine:
					break
					
				lstitem = strLine.split("|")
				bossid = lstitem[0]
				day = lstitem[1]
				strjson = lstitem[2]
				if ( int(strBossId) == int(bossid) and cmp(today,day) == 0 ):
					strcol = strjson
					break

		finally:
			fileobj.close()

		lstColInfo = list()	
		if ( len(strcol) > 0 ):
			objJson = json.loads(strcol)
			lstColInfo = list()
			for key in objJson:
				if key == "data":
					cols = objJson[key]
					for col in cols:
						colname = col['f_Name']
						coltype = col['f_Type']
						collen = col['f_Length']
						#print "name:" + colname + " type:" + coltype + " len:" + collen
						if cmp(coltype,'VARCHAR') == 0:
							lstColInfo.append(collen)
						else:
							lstColInfo.append(-1)
		
		return lstColInfo

	def ProcessFile(self,filename,bossid,fyid):
		"""
		process data file
		"""
		#lstInfo = filename.split('/')
		#bossid = lstInfo[5]
		
		lstColInfo = list()	
		#lstColInfo = self.GetBossIdColInfo(bossid)
		lstColInfo = self.GetBossidColInfoFromFile(bossid)
		if ( len(lstColInfo) == 0 ):
			print("enter")
			lstColInfo = self.GetFyIdColInfo(fyid)
		
		fileobj = open(filename)
		filewriteobj = open(filename + ".ok",'w+')
		
		try:
			while True:
				strLine = fileobj.readline()
				if not strLine:
					break
				strLine = self.ProcessLine(strLine,lstColInfo)
				filewriteobj.write(strLine)
				#filewriteobj.write(os.linesep)
		finally:
			fileobj.close()
			filewriteobj.close()


	def ProcessLine(self,strLine,lstColInfo):
		"""
		process one line of file
		"""
		lstColLen =  len(lstColInfo)
		if ( lstColLen == 0 ):
			return strLine
		
		#lstItems = strLine.split(',')
		lstItems = re.split(r'(? 0	):
				if ( index < lstItemsLen and len(lstItems[index]) > collen ):
					lstItems[index] = lstItems[index][0:collen]
			if ( collen == 0 ):
				lstItems[index] = ""
				
		#newline = ",".join(lstItems)
		if ( lstItemsLen > lstColLen ):
			newline = ",".join(lstItems[0:lstColLen])
			newline += os.linesep
		else:
			diff = lstColLen - lstItemsLen
			diff += 1
			tmplist = lstItems[0:lstItemsLen-1]
			for i in range(0,diff):
				tmplist.append('0')
				
			newline = ",".join(tmplist)
			newline += os.linesep

		return newline

	def GetProcessFiles(self,dir,lstFiles):
		fileminute = datetime.datetime.now()-datetime.timedelta(minutes=1)
		fileminute = fileminute.strftime("%Y%m%d%H%M")
		if os.path.isdir(dir):
			for s in os.listdir(dir):
				if ( fileminute in s and ".ok" not in s ):
					filename = os.path.join(dir,s)
					#print filename
					lstFiles.append(filename)
		return lstFiles

	def LoadBossIdData(self,conf):
		"""
		Load data to fy 
		"""
		lstitem = conf.split(":")
		bossid = lstitem[0]
		fyid = lstitem[1]
		fytable = lstitem[2]
		
		today = datetime.datetime.today().strftime("%Y%m%d")
		datadir="/data1/webitil/logreceiver/data/" + bossid +"/" + today
		if os.path.exists(datadir):
			#print datadir
			filelst = self.GetProcessFiles(datadir,[])
			for file in filelst:
				self.ProcessFile(file,bossid,fyid)
				strcmd = '/usr/local/bin/fyloader  --escaped "\\\\\\\\" --delimiter "," --abort_on_threshold 0.2 -l 200000 ' + fyid + ' ' + fytable+'_' + today + ' ' + file + '.ok'
				#print strcmd
				os.system(strcmd);
				if os.path.exists(file + '.ok'):
					os.remove(file + '.ok')


if __name__ == '__main__':
	start = time.clock()
	
	processer = BossDataProcesser()
	lstTh = list()	
	
	ret,value = nameapi.getValueByKey('bossid.fy.mapping.itil.dic')
	lstconf = value.split()
	for conf in lstconf:
		lstitem = conf.split(":")
		switch = lstitem[4]
		if switch == "1":
			t = Th(conf,processer)
			t.start()
			lstTh.append(t)
			#t.join()
	
	for t in lstTh:
		t.join()
		
	end = time.clock()
	
	print "cost:%f s" % (end - start)


你可能感兴趣的:(python)