Python爬取拉勾网求职信息,并写入Excel

爬取拉勾网求职信息(Python职位)

import datetime
import time
import requests
import xlwt


def List(City,Pname,Cname,Csize,Salary,WorkYear,Education,PositionAdvantage,LastLogin,Hitags):
        list.append(City)
        list.append(Pname)
        list.append(Cname)
        list.append(Csize)
        list.append(Salary)
        list.append(WorkYear)
        list.append(Education)
        list.append(PositionAdvantage)
        list.append(LastLogin)
        list.append(Hitags)

list = []
baseurl = 'https://www.lagou.com/jobs/list_python/p-city_0?px=default'
url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false'


for i in range(1,11):       # 翻页
    data = {
        'first': 'true',
        'pn': i,
        'kd': 'python',
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0',
        'Referer': 'https://www.lagou.com/jobs/list_python/p-city_0?px=default',
    }
    session = requests.session()
    session.get(baseurl,headers = headers)
    cookies = session.cookies
    # print(cookies.get_dict())

    response = requests.post(url,headers=headers,data=data,cookies=cookies).json()  #转换为json格式(字典)
    response = response.get('content')
    response = response.get('positionResult')
    response = response.get('result')
    # print(response)

    for item in response:
        City = item.get('city')
        Pname = item.get('positionName')
        Cname = item.get('companyFullName')
        Csize = item.get('companySize')
        Salary = item.get('salary')
        WorkYear = item.get('workYear')
        Education = item.get('education')
        PositionAdvantage = item.get('positionAdvantage')
        LastLogin = item.get('lastLogin')
        Hitags = item.get('hitags')
        print(f'{City}--{Pname}--{Cname}--{Csize}--{Salary}--{WorkYear}--{Education}--{PositionAdvantage}--{LastLogin}--{Hitags}')
        List(City,Pname,Cname,Csize,Salary,WorkYear,Education,PositionAdvantage,LastLogin,Hitags)

    time.sleep(2)       # 防止频繁操作
print(len(list))

# 每十个一组切割总列表
co = 0
new_list = []
for i in range(len(list)):
      co += 1
      if co == 10:
            list_2 = list[i-9:i+1]
            new_list.append(list_2)
            co = 0
print(len(new_list))

# 信息写入excel
now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')     #获取当前时间

book = xlwt.Workbook(encoding='utf-8',style_compression=0)    #创建excel
sheet = book.add_sheet('Python招聘')                             #创建sheet
col = ("所在城市","职位名称","公司名称","公司人数","薪资","工作经验","教育条件","职位诱惑","发布时间","其他")              #列属性

for i in range(0,len(col)):                                   #将列属性写入excel
    sheet.write(0,i,col[i])

for i in range(0,len(new_list)):
    for j in range(0,len(col)):
        sheet.write(i+1,j,new_list[i][j])
book.save(f'拉钩网_Python{now}.xls')                            #保存文件 (以爬取时间为文件名)

你可能感兴趣的:(Python爬取拉勾网求职信息,并写入Excel)