import requests
import csv
from bs4 import BeautifulSoup
def get_position():
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
for page in range(100):
url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590,2,{}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='.format(page)
response = requests.get(url,headers=headers)
response.encoding = 'gbk'
soup = BeautifulSoup(response.text,'lxml')
data = soup.select('#resultList > .el')
for ti in data:
title = ti.select('.t1')[0].text.strip()
company_name = ti.select('.t2')[0].text.strip()
work_space = ti.select('.t3')[0].text.strip()
salary = ti.select('.t4')[0].text.strip()
result = [title,company_name,work_space,salary]
with open('51job.csv','a',newline='') as f:
writer = csv.writer(f)
writer.writerow(result)
if __name__ == '__main__':
get_position()