消费者、生产者作业6

import csv
import requests
import threading
from queue import Queue


# 生产者
class Producer(threading.Thread):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/5 like Gecko) Chrome/95.0.4638.69 Safari/537.36'
    }

    def __init__(self,page_queue,data_queue):
        super(Producer,self).__init__()
        self.page_queue = page_queue
        self.data_queue = data_queue


    def run(self):
        while True:
            if self.page_queue.empty():
                break
            url = self.page_queue.get()
            # 处理数据
            self.parse_page(url)

    def parse_page(self, url):
        response = requests.get(url, headers=self.headers)
        data = response.json()['Data']['Posts']

        for item in data:
            json_data = {}
            json_data['work_name'] = item['RecruitPostName']
            json_data['work_address'] = item['LocationName']
            json_data['work_url'] = item['PostURL']
            self.data_queue.put(json_data)


# 消费者
class Consumer(threading.Thread):

    f = open('job.csv','a',encoding='utf-8',newline='')
    writer = csv.DictWriter(f,fieldnames=['work_name', 'work_address', 'work_url'])
    writer.writeheader()

    def __init__(self, data_queue):
        super(Consumer,self).__init__()
        self.data_queue = data_queue


    def run(self):
        while True:
            if self.data_queue.empty():
                break
            data = self.data_queue.get()
            self.writer.writerow(data)


if __name__ == '__main__':
    # 存放url队列
    page_queue = Queue()
    # 存放数据队列
    data_queue = Queue()

    for i in range(1,10):
        url = f'https://careers.tencent.com/tencentcareer/api/post/Query?categoryId=&parentCategoryId=40001&&pageIndex={i}&pageSize=10&language=zh-cn&area=cn'
        page_queue.put(url)


    p_list = []
    # 生产者生产数据
    for i in range(3):
        p = Producer(page_queue,data_queue)
        p.start()
        p_list.append(p)


    for p_i in p_list:
        p_i.join()

    c_list = []
    # 消费者消耗数据
    for i in range(3):
        c = Consumer(data_queue)
        c.start()
        c_list.append(c)

    for c_i in c_list:
        c_i.join()

你可能感兴趣的:(python,开发语言)