✨作者主页:IT毕设梦工厂✨
个人简介:曾从事计算机专业培训教学,擅长Java、Python、微信小程序、Golang、安卓Android等。接项目定制开发、代码讲解、答辩教学、文档编写、降重等。
☑文末获取源码☑
精彩专栏推荐⬇⬇⬇
Java项目
Python项目
安卓项目
微信小程序项目
class GetJob:
def __init__(self, keyword='Python'):
self.url = 'https://we.51job.com/api/job/search-pc'
self.params = {
'api_key': '51job',
'timestamp': int(time.time()),
'keyword': 'Python',
'searchType': '2',
'function': '',
'industry': '',
'jobArea': '190200',
'jobArea2': '',
'landmark': '',
'metro': '',
'salary': '',
'workYear': '',
'degree': '',
'companyType': '',
'companySize': '',
'jobType': '',
'issueDate': '',
'sortType': '0',
'pageNum': '1',
'requestId': '',
'pageSize': '20',
'source': '1',
'accountId': '',
'pageCode': 'sou%7Csou%7Csoulb'
}
self.headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.43',
'Sign': '303cd48e5183132d5fe88463fb63f618bb30648042702b21e0b79cd854493b24',
'Referer': 'https://we.51job.com/pc/search',
}
self.cookies = {
'guid': '58cd45b87123e1dc7d4fb9715e79aea7',
'nsearch': 'jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D',
'privacy': '1689774694',
'Hm_lvt_1370a11171bd6f2d9b1fe98951541941': '1689774697',
'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2258cd45b87123e1dc7d4fb9715e79aea7%22%2C%22first_id%22%3A%22188a301c974fa9-020df9f5bef6cca-7b515473-3686400-188a301c97510eb%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTg4YTMwMWM5NzRmYTktMDIwZGY5ZjViZWY2Y2NhLTdiNTE1NDczLTM2ODY0MDAtMTg4YTMwMWM5NzUxMGViIiwiJGlkZW50aXR5X2xvZ2luX2lkIjoiNThjZDQ1Yjg3MTIzZTFkYzdkNGZiOTcxNWU3OWFlYTcifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%2258cd45b87123e1dc7d4fb9715e79aea7%22%7D%2C%22%24device_id%22%3A%22188a301c974fa9-020df9f5bef6cca-7b515473-3686400-188a301c97510eb%22%7D',
'partner': 'www_baidu_com',
'seo_refer_info_2023': '%7B%22referUrl%22%3A%22https%3A%5C%2F%5C%2Fwww.baidu.com%5C%2Flink%3Furl%3DwezHEiaxWGb_ffRXyLCXDr8X5pchQ55jOVkADY35QnK%26wd%3D%26eqid%3Df2e0a323000b80310000000664b7ea70%22%2C%22referHost%22%3A%22www.baidu.com%22%2C%22landUrl%22%3A%22%5C%2F%22%2C%22landHost%22%3A%22www.51job.com%22%2C%22partner%22%3Anull%7D',
'slife': 'lastvisit%3D190200%26%7C%26',
'Hm_lpvt_1370a11171bd6f2d9b1fe98951541941': '1689775383',
'search': 'jobarea%7E%60190200%7C%21recentSearch0%7E%60190200%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FAPython%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA7309%2C7120%2C7307%2C7301%2C7302%A1%FB%A1%FA01%2C37%2C38%2C32%2C39%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%B3%A4%C9%B3%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch2%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA01%2C06%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%B3%A4%C9%B3%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch3%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21',
'acw_tc': 'ac11000116897795248505662e00d2e12e77cf36fdda86ada911fd4d70c3db',
'acw_sc__v2': '64b7fd44f11e1f30f91a894cd2944b313b1d066e',
'JSESSIONID': 'C3E25C2F9BF89FABAAD72AC849C82E77',
'ssxmod_itna2': 'eqRxBDy7DQi=GQeD=DXmqSD7whGiei=bY=WKDuG4n9Sq3xDsNsDLAEZWwUQceqApKcD3E9GGqdzQorWQHUeoi1D5DcY0vTufkSTfixsZdrXx5qay4ceo5l3MEask6IlGLhKsVZ/B6w=8GMA/Ub4nfgjDam0SBk3YwrUVfQR4sGi9iObQMnUxQBuLBm=MRnnLic=Ib6rHr49rkpcax197O=ykp6H1wg3zLPyO6aa+1c786LWFV2cvOPrMjdSkOmTfsnXdXIXQcUb0pm2Uy/nlBIiXV6lnrNlg8vUK1Nj0UW0yNFL9qUt0CKO1Xua5SOU/0UL1iC64DQ9Dir3mxNWn4KGCmGxQ0rmYQ+rPsDK5RDi0DQbqY04DEfKgrQEr/DDFqD+6DxD=',
'ssxmod_itna': 'Yqfx2QKmq7qCq4BPGKTn0DBACtPh0x7K2ELdD/QY+DnqD=GFDK40EABEQD7+K9djdwqWe9oR5xKMW=O4u4eRWe43IZ7xB3DEx0=rq+0Y4GGfxBYDQxAYDGDDp8Dj4ibDY+tODjnz/Zl61KDpxGrDlKDRx0749KDbxDaDGakVjWk96P+4fYDno+1K7ikD75Dux0HeGSt6xDCEUStlO5A+4i3ACIhK40OD0Fwncp+DB4C/h5OySueAQGoZQiKriG3+70KNo4qIjmqiM2DtC2xGc0D+A0oZ72t4D===',
}
self.json_data = {}
self.all_detail = []
if keyword:
self.params['keyword'] = keyword
def get_json(self):
resp = requests.get(url=self.url, headers=self.headers, params=self.params, cookies=self.cookies)
resp.encoding = 'utf-8'
self.json_data = resp.json()
def parse_json(self):
for job in self.json_data['resultbody']['job']['items']:
detail = {
"job_name": job['jobName'],
"job_area": job['jobAreaString'],
"provide_salary": job['provideSalaryString'],
"work_year": job['workYearString'],
"company": job['fullCompanyName'],
"degree": job['degreeString'],
"company_type": job['companyTypeString'],
"term": job['termStr'],
"industry_type": job['industryType1Str'],
}
self.all_detail.append(detail)
def save_as_csv(self):
with open('../职位数据分析/data.csv', 'a', newline='', encoding='utf-8-sig') as f:
fieldnames = [
"job_name", "job_area", "provide_salary",
"work_year", "company", "degree",
"company_type", "term", "industry_type",
]
csv_writer = csv.DictWriter(f, fieldnames=fieldnames)
csv_writer.writeheader()
csv_writer.writerows(self.all_detail)
def run(self):
self.get_json()
self.parse_json()
self.save_as_csv()
if __name__ == '__main__':
# keyword = input('输入查询的职位(默认Python):')
# crawler = GetJob(keyword)
crawler = GetJob()
crawler.run()
def parse_path(url_path):
try:
string = url_path.split('?')[1]
lst = string.split('&')
dict_list = []
for item in lst:
dict_list.append(item.split('='))
return dict(dict_list)
except:
return {}
def main():
path = 'https://we.51job.com/api/job/search-pc?api_key=51job×tamp=1689775425&keyword=Python&searchType=2&function=&industry=&jobArea=190200&jobArea2=&landmark=&metro=&salary=&workYear=°ree=&companyType=&companySize=&jobType=&issueDate=&sortType=0&pageNum=1&requestId=&pageSize=20&source=1&accountId=&pageCode=sou%7Csou%7Csoulb'
print(parse_path(path))
if __name__ == '__main__':
main()
# locate the dirs based on where this script is - it may be either in the
# source tree, or in an installed Python 'Scripts' tree.
this_dir = os.path.dirname(__file__)
site_packages = [
site.getusersitepackages(),
] + site.getsitepackages()
failures = []
# Run a test using subprocess and wait for the result.
# If we get an returncode != 0, we know that there was an error, but we don't
# abort immediately - we run as many tests as we can.
def run_test(script, cmdline_extras):
dirname, scriptname = os.path.split(script)
# some tests prefer to be run from their directory.
cmd = [sys.executable, "-u", scriptname] + cmdline_extras
print("--- Running '%s' ---" % script)
sys.stdout.flush()
result = subprocess.run(cmd, check=False, cwd=dirname)
print("*** Test script '%s' exited with %s" % (script, result.returncode))
sys.stdout.flush()
if result.returncode:
failures.append(script)
def find_and_run(possible_locations, extras):
for maybe in possible_locations:
if os.path.isfile(maybe):
run_test(maybe, extras)
break
else:
raise RuntimeError(
"Failed to locate a test script in one of %s" % possible_locations
)
def main():
import argparse
code_directories = [this_dir] + site_packages
parser = argparse.ArgumentParser(
description="A script to trigger tests in all subprojects of PyWin32."
)
parser.add_argument(
"-no-user-interaction",
default=False,
action="store_true",
help="(This is now the default - use `-user-interaction` to include them)",
)
parser.add_argument(
"-user-interaction",
action="store_true",
help="Include tests which require user interaction",
)
parser.add_argument(
"-skip-adodbapi",
default=False,
action="store_true",
help="Skip the adodbapi tests; useful for CI where there's no provider",
)
args, remains = parser.parse_known_args()
# win32, win32ui / Pythonwin
extras = []
if args.user_interaction:
extras += ["-user-interaction"]
extras.extend(remains)
scripts = [
"win32/test/testall.py",
"Pythonwin/pywin/test/all.py",
]
for script in scripts:
maybes = [os.path.join(directory, script) for directory in code_directories]
find_and_run(maybes, extras)
# win32com
maybes = [
os.path.join(directory, "win32com", "test", "testall.py")
for directory in [
os.path.join(this_dir, "com"),
]
+ site_packages
]
extras = remains + ["1"] # only run "level 1" tests in CI
find_and_run(maybes, extras)
# adodbapi
if not args.skip_adodbapi:
maybes = [
os.path.join(directory, "adodbapi", "test", "adodbapitest.py")
for directory in code_directories
]
find_and_run(maybes, remains)
# This script has a hard-coded sql server name in it, (and markh typically
# doesn't have a different server to test on) but there is now supposed to be a server out there on the Internet
# just to run these tests, so try it...
maybes = [
os.path.join(directory, "adodbapi", "test", "test_adodbapi_dbapi20.py")
for directory in code_directories
]
find_and_run(maybes, remains)
if failures:
print("The following scripts failed")
for failure in failures:
print(">", failure)
sys.exit(1)
print("All tests passed \\o/")
if __name__ == "__main__":
main()
【爬虫+可视化】基于python的职位数据分析
大家可以帮忙点赞、收藏、关注、评论啦~
源码获取:私信我
精彩专栏推荐⬇⬇⬇
Java项目
Python项目
安卓项目
微信小程序项目