爬取51job数据

1.先导包requests,json(我用的pycharm,如果你没有这个包的话,他会提示你,你直接点击import这个就可以,pycharm安装教程网上搜)
2.代码如下

import requests
import json
from lxml import etree

BASE_DOMAIN = 'https://search.51job.com'
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
}
Recruitments = []

def parse_page(url):
    # url = 'https://search.51job.com/list/120200,000000,0000,00,9,99,python,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
    resp = requests.get(url,HEADERS)
    text = resp.content.decode('gbk')
    tree = etree.HTML(text)
    PositionAndCompany = tree.xpath("//div[@class='el']//span/a/@title")
    Company = PositionAndCompany[1::2]
    Position = PositionAndCompany[::2]
    Workplace = tree.xpath("//div[@class='el']//span[@class='t3']/text()")
    Payroll = tree.xpath("//div[@class='el']//span[@class='t4']/text()")
    Releasetime = tree.xpath("//div[@class='el']//span[@class='t5']/text()")

    for value in zip(Position, Company, Workplace, Payroll, Releasetime):
        Position, Company, Workplace, Payroll, Releasetime = value
        Recruitment = {
            '职位': Position,
            '公司': Company,
            '工作地点': Workplace,
            '薪资': Payroll,
            '发布时间': Releasetime,
        }
        Recruitments.append(Recruitment)

    with open('51job.json', 'w', encoding='utf-8') as fp:
        json.dump(Recruitments, fp, ensure_ascii=False)

def spider():
    # 前50页济南北京上海广东深圳的python招聘页的url
    base_urls = 'https://search.51job.com/list/120200%252C010000%252C020000%252C030200%252C040000,000000,0000,00,9,99,python,2,{}.html'
    for x in range(1,51):
        page_url = base_urls.format(x)
        parse_page(page_url)
        print('第%s页爬取完成' % x)

def main():
    spider()

if __name__ == '__main__':
    main()

运行结果
爬取51job数据_第1张图片
触动精灵连接不上设备这个网址上有解决办https://www.smzy.com/smzy/tech29119.html但是检查设备上的触动精灵服务和广播开关是否为开启状态不知道设备上的触动精灵服务和广播开关在哪里
在这里插入图片描述

你可能感兴趣的:(爬取51job数据)