51job简单爬虫

import requests
import csv
from bs4 import BeautifulSoup 

def get_position():
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
    for page in range(100):
        url = 'https://search.51job.com/list/000000,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590,2,{}.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='.format(page)
        response = requests.get(url,headers=headers)
        response.encoding = 'gbk'
        soup = BeautifulSoup(response.text,'lxml')
        data = soup.select('#resultList > .el')

        for ti in data:
            title = ti.select('.t1')[0].text.strip()
            company_name = ti.select('.t2')[0].text.strip()
            work_space = ti.select('.t3')[0].text.strip()
            salary = ti.select('.t4')[0].text.strip()

            result = [title,company_name,work_space,salary]

            with open('51job.csv','a',newline='') as f:
                writer = csv.writer(f)
                writer.writerow(result)

if __name__ == '__main__':
    get_position()

你可能感兴趣的:(51job简单爬虫)