所谓用户代理池就是将不同的用户代理组建为一个池子,随后随机调用。防反扒效果好。
#用户代理池的构建
import urllib.request
import re
import random #随机数组模块
uapools=[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0"
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0"
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 "
]
def ua(uapools):
thisua=random.choice(uapools)
print(thisua)
# 浏览器伪装
headers = ("User-Agent",thisua)
opener = urllib.request.build_opener()
opener.addheaders = [headers]
# 安装为全局
urllib.request.install_opener(opener)
for i in range(0,3):
ua(uapools)
thisurl="https://www.qiushibaike.com/text/page/"+str(i+1)+"/"
data=urllib.request.urlopen(thisurl).read().decode("utf-8","ignore")
pat='.*?(.*?).*?'
resoult=re.compile(pat,re.S).findall(data)
for j in range(0,len(resoult)):
print(resoult[j])
print("___________________")