import csv
import requests
import threading
from queue import Queue
# 生产者
class Producer(threading.Thread):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/5 like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
def __init__(self,page_queue,data_queue):
super(Producer,self).__init__()
self.page_queue = page_queue
self.data_queue = data_queue
def run(self):
while True:
if self.page_queue.empty():
break
url = self.page_queue.get()
# 处理数据
self.parse_page(url)
def parse_page(self, url):
response = requests.get(url, headers=self.headers)
data = response.json()['Data']['Posts']
for item in data:
json_data = {}
json_data['work_name'] = item['RecruitPostName']
json_data['work_address'] = item['LocationName']
json_data['work_url'] = item['PostURL']
self.data_queue.put(json_data)
# 消费者
class Consumer(threading.Thread):
f = open('job.csv','a',encoding='utf-8',newline='')
writer = csv.DictWriter(f,fieldnames=['work_name', 'work_address', 'work_url'])
writer.writeheader()
def __init__(self, data_queue):
super(Consumer,self).__init__()
self.data_queue = data_queue
def run(self):
while True:
if self.data_queue.empty():
break
data = self.data_queue.get()
self.writer.writerow(data)
if __name__ == '__main__':
# 存放url队列
page_queue = Queue()
# 存放数据队列
data_queue = Queue()
for i in range(1,10):
url = f'https://careers.tencent.com/tencentcareer/api/post/Query?categoryId=&parentCategoryId=40001&&pageIndex={i}&pageSize=10&language=zh-cn&area=cn'
page_queue.put(url)
p_list = []
# 生产者生产数据
for i in range(3):
p = Producer(page_queue,data_queue)
p.start()
p_list.append(p)
for p_i in p_list:
p_i.join()
c_list = []
# 消费者消耗数据
for i in range(3):
c = Consumer(data_queue)
c.start()
c_list.append(c)
for c_i in c_list:
c_i.join()