一个比较好的解决缓冲区多线程的代码:
#!/usr/bin/env pythonimport Queue
import threading
import urllib2
import time
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"] #可以表示从数据库中查询出来的记录
queue = Queue.Queue() #建立队列
class ThreadUrl(threading.Thread):
"""Threaded Url Grab"""
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True: #一直进行监听操作
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and prints first 1024 bytes of page
url = urllib2.urlopen(host)
print url.read(1024)
#signals to queue job is done
self.queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
t = ThreadUrl(queue)
t.setDaemon(True)
t.start()
#populate queue with data
for host in hosts:
queue.put(host)
#wait on the queue until everything has been processed
queue.join()
main()
print "Elapsed Time: %s" % (time.time() - start)
在 Python 中使用线程时,这个模式是一种很常见的并且推荐使用的方式。具体工作步骤描述如下:
- 创建一个
Queue.Queue()
的实例,然后使用数据对它进行填充。 - 将经过填充数据的实例传递给线程类,后者是通过继承
threading.Thread
的方式创建的。 - 生成守护线程池。
- 每次从队列中取出一个项目,并使用该线程中的数据和 run 方法以执行相应的工作。
- 在完成这项工作之后,使用
queue.task_done()
函数向任务已经完成的队列发送一个信号。 - 对队列执行 join 操作,实际上意味着等到队列为空,再退出主程序。
===============================================
#!/usr/bin/python
#coding:utf8
import glob,threading,Queue,time
start = time.time()
#创建一个队列实例,后面使用数据填充
queue = Queue.Queue(maxsize = 10)
DB_PATH="mypath"
#类mycheck继承自threading.Thread
class mycheck(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue = queue
#定义run方法执行此线程要执行的代码
def run(self):
while True:
#queue.get()取一个数据
path_ext = str(self.queue.get()).zfill(2)
fullpath = DB_PATH + path_ext
for dbfile in glob.glob("%s/*.txt"%fullpath):
fp = file(dbfile,'r')
dbcontent = fp.readlines()
fp.close()
if (not dbcontent[len(dbcontent)-1].startswith("End")) and (not dbcontent[len(dbcontent)-2].startswith("End")):
continue
print fullpath
#完成这项工作后,使用task_done函数向任务完成的队列发送一个信号
self.queue.task_done()
def main():
for i in xrange(4):
t = mycheck(queue)
t.setDaemon(True)
t.start()
for i in xrange(100):
#queue.put()放入数据
queue.put(i)
#join()保持阻塞状态,直到处理了队列中的所有项目为止
queue.join()
main()
print "Time:%s"%(time.time()-start)
============================================
- #!/usr/bin/env python
- import Queue
- import threading
- import time
- import random
- q=Queue.Queue(0)
- NUM_WORKERS = 3
- class MyThread(threading.Thread):
- """A worker thread."""
- def __init__(self, input, worktype):
- self._jobq = input
- self._work_type = worktype
- threading.Thread.__init__(self)
- def run(self):
- """
- Get a job and process it.
- Stop when there's no more jobs
- """
- while True:
- if self._jobq.qsize()>0:
- job = self._jobq.get()
- worktype=self._work_type
- self._process_job(job,worktype)
- else:
- break
- def _process_job(self, job,worktype):
- """
- Do useful work here.
- worktype: let this thread do different work
- 1,do list
- 2,do item
- 3,,,
- """
- doJob(job)
- def doJob(job):
- """
- do work function 1
- """
- time.sleep(random.random()*3)
- print "doing ",job
- if __name__=='__main__':
- print "begin..."
- #put some work to q
- for i in range(NUM_WORKERS*2):
- q.put(i)
- #print total job q's size
- print "job q'size",q.qsize()
- #start threads to work
- for x in range(NUM_WORKERS):
- MyThread(q,x).start()
- #if q is not empty, wait
- #while q.qsize()>0:
- # time.sleep(0.1)