from bs4 import BeautifulSoup
import requests
import ip_proxy
from urllib import parse
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
def get_boss_info(my_ip,detailed_url):
proxy = {
'http': 'http://' + my_ip.ip_proxy_str,
'https': 'http://' + my_ip.ip_proxy_str
}
response = requests.get(detailed_url, headers=headers, proxies = proxy, timeout=5)
soup = BeautifulSoup(response.text, 'lxml')
title = soup.find('h1').text
salary = soup.find('span', class_="badge").text.replace('\n', '').strip()
print(title)
print(salary)
gezhong_info = soup.select('div.info-primary > p')[0].text.replace('\n', '').strip()
print(gezhong_info)
gangwei_info = soup.select('div.text')[0].text
print(gangwei_info)
def get_detail_url(my_ip, url):
proxy = {
'http': 'http://' + my_ip.ip_proxy_str,
'https': 'http://' + my_ip.ip_proxy_str
}
response = requests.get(url, headers = headers, proxies=proxy, timeout=5)
soup = BeautifulSoup(response.text, 'lxml')
a_ele_list = soup.select('div.job-list > ul > li div.info-primary > h3 > a')
for a_ele in a_ele_list:
a_href = a_ele['href']
href = parse.urljoin(url, a_href)
print('详情页的href: ' + href)
for i in range(0,3):
try:
get_boss_info(my_ip, href)
break
except Exception as e:
print(e)
my_ip.update_ip_proxy_str()
def get_all_info(my_ip):
base_url = 'https://www.zhipin.com/c101010100/h_101010100/?query=python&page=%s&ka=page-%s'
for i in range(1,4):
url = base_url % (i, i)
for i in range(0, 4):
try:
get_detail_url(my_ip, url)
break
except Exception as e:
print(e)
my_ip.update_ip_proxy_str()
if __name__ == '__main__':
my_ip = ip_proxy.ip_getter()
get_all_info(my_ip)
import requests
class ip_getter(object):
def __init__(self):
self.ip_proxy_str = get_ip_string()
def update_ip_proxy_str(self):
self.ip_proxy_str = get_ip_string()
print('get one ip : ' + self.ip_proxy_str)
def get_ip_string():
url = 'http://dps.kdlapi.com/api/getdps/?orderid=963491899590153&num=1&pt=1&ut=1&dedup=1&sep=1'
response = requests.get(url)
return response.text