用Python爬取51job保存到CSV

import requests
from lxml import etree
import csv


def spider(url_):
    response = requests.get(url_, headers=headers)
    response.encoding = 'gbk'
    return etree.HTML(response.text)


# 获取网页内容
def download(list_url):
    selector = spider(list_url)
    all_list = selector.xpath('//div[@class="dw_table"]/div[@class="el"]')  # 获取页面全部的例子
    for div in all_list:
        a = div.xpath("p/span/a")[0]
        name = a.xpath("text()")[0].strip()  # 招聘职位名字
        company = div.xpath('span[@class="t2"]/a/text()')[0]  # 招聘公司
        place = div.xpath('span[@class="t3"]/text()')[0]  # 招聘地点
        money = div.xpath('span[@class="t4"]/text()')  # 工资
        money = money[0] if money else"面议"
        time = div.xpath('span[@class="t5"]/text()')  # 发布时间
        time = time[0] if time else  "没有时间"
        csv_writer([name, company, place, money, time])
# 将内容保存到CSV文件
def csv_writer(item):
    with open('51job.csv', 'a', encoding='utf-8', newline='')as csvfile:
        writer = csv.writer(csvfile)
        try:
            writer.writerow(item)
        except Exception as e:
            print(e)
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
    'Host': 'search.51job.com'
}

url_list = 'https://search.51job.com/list/070400,000000,0000,00,9,99,python,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
download(url_list)

最后保存到CSV中的内容
用Python爬取51job保存到CSV_第1张图片

你可能感兴趣的:(Python爬虫)