python完整框架爬取浙江省的职高

import requests
from lxml import etree
wangzhi=‘http://www.zjcs.net.cn/’
url=‘http://www.zjcs.net.cn/index/xwsd.htm’
headers={‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36’}
html=requests.get(url,headers=headers)
html.encoding=‘utf-8’

print(html.text)

html=etree.HTML(html.text)

title=html.xpath(’/html/body/div[3]/div[2]/ul/li[1]/p/a/@href’)[0]

print(title)

url=title.replace(’…/’,wangzhi)

print(url)

for i in range(1,11):
sd=162-i
jixupa=‘http://www.zjcs.net.cn/index/xwsd/’+str(sd)+’.htm’
# print(jixupa)
html = requests.get(jixupa, headers=headers)
html.encoding = ‘utf-8’
# print(html.text)
html = etree.HTML(html.text)
title = html.xpath(’//*[@id=“line_u6_0”]/p/a/@href’)[0]
url=title.replace(’…/…/’,wangzhi)
# print(html.text)
print(url)
html = requests.get(url, headers=headers)
html.encoding = ‘utf-8’
print(html.text)

你可能感兴趣的:(szy的python,python)