Python使用正则爬取51job

为了更快捷,使用多线程

import requests
import re


def get_request(page):
    url = "https://search.51job.com/list/190200,000000,0000,00,9,99,%25E5%25B0%258F%25E7%25A8%258B%25E5%25BA%258F,2,"+str(page)+".html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="
    headers = {
        "User-Agent":"头部信息"
    }
    res = requests.get(url,headers=headers)
    res.encoding = res.apparent_encoding
    return res.text

def get_content(respon):
    model = re.compile(r'
.*?(.*?).*?.*?">(.*?).*?(.*?).*?(.*?).*?(.*?)',re.S) values = re.findall(model,respon) for value in range(0,len(values)): #print(values[value]) link = values[value][0] position = values[value][1].strip() company = values[value][2] address = values[value][3] wage = values[value][4] updataTime = values[value][5] print("职位链接:",link,"职位名:",position,"公司名:",company,"工作地址:",address,"工资:",wage,"发布时间:",updataTime) def main(page): try: respon = get_request(page) get_content(respon) print("*"*50,"\n\n") print("第",page,"页爬取成功","\n\n") print("*"*50) except Exception as e: print("*"*50,"\n\n") print("第",page,"页爬取失败","\n\n") print("*"*50) if __name__ == '__main__': for page in range(1,8): main(page)

 

你可能感兴趣的:(Python使用正则爬取51job)