Python爬取智联招聘信息

代码如下
import requests
import json
from lxml import etree



headers = {

    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,'

                  ' like Gecko) Chrome/72.0.3626.119 Safari/537.36'

}

def url_index():
    range_ = ['https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=90'.format(num) for num in range(0, 12)]
    return range_

def salary_index(salary):
    salarys=['','']
    if 'K' in salary:
        salary=salary.replace('K','000')
        if '.' in salary:
            salary = salary.replace('.000', '00')
        if '-' in salary:
            salarys=salary.split('-')
        else:
            salarys=[salary,salary]
    return salarys
def go_index(url):
    response = requests.post(url, headers)
    loads = json.loads(response.content.decode())#将请求数据转换成字典数据

    json_list=[]
    for item in loads['data']['results']:
        salarys=salary_index(item.get('salary',''))

        data={
            'city_name':item.get('city',dict()).get('display',''),#城市
            'com_name':item.get('company',dict()).get('name',''),#公司名
            'com_size':item.get('company',dict()).get('size',dict()).get('name',''),#公司人数
            'com_type':item.get('company',dict()).get('type',dict()).get('name',''),#公司性质
            'job_name': item.get('jobName', ''),#职位
            'job_tag': item.get('jobTag', dict()).get('searchTag', ''),#福利
            'timeState': item.get('timeState', ''),#发布时间情况 -最新 -最近 -招聘中
            'low_salary': salarys[0],#工资下限
            'higt_salary': salarys[1],#上限
            'positionURL': item.get('positionURL', '')#详情页
        }
        json_list.append(data)

    return json_list
def del_json(json_list):#从详情页中获取公司计划招聘人数
    end_list=[]
    for item in json_list:
        url=item['positionURL']
        item['size']=''
        response = requests.get(url, headers)
        html = etree.HTML(response.content.decode())
        html_index = html.xpath('//ul[@class="summary-plane__info"]/li')
        item['size']=html_index[3].xpath('./text()')
        del item['positionURL']
        print(item)
        end_list.append(item)
    return end_list
if __name__ == '__main__':    #主要逻辑
    #获取url_list
    url_list=url_index();
    json_list=[]
    for url in url_list:
        #发送请求  处理数据
        json_index=go_index(url);
        json_list+=json_index
    end_list=del_json(json_list)

    try:
        with open('zl2.txt','a+',encoding='utf-8') as f:
            json.dump(end_list,f,ensure_ascii=False)
    except Exception as e:
        print(e)

你可能感兴趣的:(大数据,Python,爬虫,Python)