import threading,requests
from queue import Queue
import pymongo
class Productor(threading.Thread):
def __init__(self,page_queue,data_queue):
threading.Thread.__init__(self)
self.page_queue = page_queue
self.data_queue = data_queue
def run(self):
while True:
if self.page_queue.empty():
break
url = self.page_queue.get()
self.get_content(url)
def get_content(self,url):
response = requests.get(url=url, headers=headers)
Posts = response.json()['Data']['Posts']
for post in Posts:
dic = {
}
RecruitPostName = post['RecruitPostName']
syq = post['BGName']
LocationName = post['LocationName']
LastUpdateTime = post['LastUpdateTime']
dic['RecruitPostName'] = RecruitPostName
dic['syq'] = syq
dic['LocationName'] = LocationName
dic['LastUpdateTime'] = LastUpdateTime
self.data_queue.put(dic)
class Consumer(threading.Thread):
def __init__(self,data_queue,page_queue):
threading.Thread.__init__(self)
self.data_queue = data_queue
self.page_queue = page_queue
def run(self):
while True:
if self.data_queue.empty() and self.page_queue.empty() and switch==1:
break
try:
data = self.data_queue.get(timeout=10)
print(data)
self.save(data)
except:
break
def save(self,data):
col.insert(data)
pass
def __del__(self):
client.close()
pass
if __name__ == '__main__':
switch = 0
client = pymongo.MongoClient(host='127.0.0.1', port=27017)
db = client['tencent']
col = db['zhaopin']
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
base_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?pageIndex={}&pageSize=10'
data_queue = Queue(1000)
page_queue = Queue(100)
for i in range(1,51):
page_queue.put(base_url.format(i))
p_list = []
for i in range(3):
p = Productor(page_queue,data_queue)
p.start()
p_list.append(p)
for i in range(3):
c = Consumer(data_queue,page_queue)
c.start()
for p in p_list:
p.join()
switch = 1
pass