python爬虫-某政府网站加速乐(简单版)实例小记

# -*- coding:utf-8 -*-
# @Time : 2023/10/23 17:06
# @Author: 水兵没月
# @File : 哈哈哈哈.py
# @Software: PyCharm
####################

import random
import requests

# 代理
def get_proxy(proxy_type=random.choice([1,2,3,4,5])):
    url = "http://ZZZZZZZZZZZZZZZZZZ"
    url = "http://XXXXXXXXXXXXXXXX
    payload={
        "proxy_type": proxy_type,
        "spider_type": 2,
    }
    response = requests.request("POST", url, data=payload)
    proxies = response.json()['msg'][0]
    return proxies

#  记录
url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL2NvbC9jb2w4MzU2OC9pbmRleC5odG1sP3VpZD0zNTEwODUmcGFnZU51bT0xMjE='
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Host": "sthjt.jiangsu.gov.cn",
    "Pragma": "no-cache",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",

}
res = requests.session().get(url, headers=headers, proxies=get_proxy(1) )
res.encoding = 'UTF-8'
cookies = res.cookies.items()
cookie = ''
for name, value in cookies:
    cookie += '{0}={1};'.format(name, value)
headers['Cookie'] = cookie
print(headers)
url = 'aHR0cDovL3N0aGp0LmppYW5nc3UuZ292LmNuL21vZHVsZS93ZWIvanBhZ2UvZGF0YXByb3h5LmpzcD9zdGFydHJlY29yZD0xJmVuZHJlY29yZD0xMjAmcGVycGFnZT00MCcrJyZjb2w9MSZhcHBpZD0xJndlYmlkPTE0JnBhdGg9JTJGJmNvbHVtbmlkPTgzNTY4JnNvdXJjZUNvbnRlbnRUeXBlPTEmdW5pdGlkPTM1MTA4NSZ3ZWJuYW1lPSVFNiVCMSU5RiVFOCU4QiU4RiVFNyU5QyU4MSVFNyU5NCU5RiVFNiU4MCU4MSVFNyU4RSVBRiVFNSVBMiU4MyVFNSU4RSU4NSZwZXJtaXNzaW9udHlwZT0w'

res = requests.session().get(url, headers=headers, proxies=get_proxy())
res = res.text
print([res])
print('========================')

某网站cookie 反爬为创宇盾加速乐,测试了翻页对和刷新页面cookie的情况,无变化。因此解决这个网址的加速乐。先请求提供的第一步网址,拿到cookie,直接将cookie 用在目标网址请求中即可。亲测可用,但是即使加上代理也很容易被请求限制,建议使用time.sleep(3),防止请求频繁

你可能感兴趣的:(python,爬虫,1024程序员节)