python多线程执行任务,读取文件,批量发请求

#!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import threading
import time
import Queue
import json,urllib2

# 添加线程  创建5个线程名
threadList = ["Thread-1", "Thread-2", "Thread-3", "Thread-4", "Thread-5"]
# 设置队列长度
workQueue = Queue.Queue(300)
# 线程池
threads = []
start = time.time()
class myThread(threading.Thread):
    def __init__(self, name, q):
        threading.Thread.__init__(self)
        self.name = name
        self.q = q

    def run(self):
        print("Starting " + self.name)
        while True:
            try:
                crawler(self.name, self.q)
            except:
                break
        print("Exiting " + self.name)

# 创建新线程
for tName in threadList:
    thread = myThread(tName, workQueue)
    thread.start()
    threads.append(thread)



def crawler(threadName, q):
    textmod = {}
    textmod = json.dumps(textmod)
    header_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
                   "Content-Type": "application/json"}
    
#执行多线程
# 从队列里取出数据
    url = q.get(timeout=2)
    try:
        req = urllib2.Request(url=url, data=textmod, headers=header_dict)
        res = urllib2.urlopen(req)
        res = res.read()
        print("index success:" + str(pid) + "      " + res)
    except Exception as e:
        print(q.qsize(), threadName, "Error: ", e)

# 读取数据,放入队列
count = 0
filename = 'id.txt'
f = open(filename, 'r').readlines()
for id in f:
    pid = id.replace("\"", "").replace("\n", "")
    count = count + 1
    # 生产index
    indexUrl = "https://www.baidu.com"
    url = indexUrl + '/aaa?id=' + pid 
    workQueue.put(url)

# 等待所有线程完成
for t in threads:
    t.join()

end = time.time()
print('Queue多线程批量执行时间为:', end - start)

代码非常简单:

1、配置下需要启动多少个线程,

2、然后修改读取数据,放入队列那里的文件名和代码,改成自己要执行的代码段。

3、修改从队列取出数据执行代码,多个线程自己来取数据,

你可能感兴趣的:(python)