python脚本实现自动化访问博客,内置多个代理ip,可随机切换用户随机访问博文
#proxies参数是键值对形式,使用字典存储
proxy1={'http' : '109.197.188.12:8080'}
proxy2 ={'http' : '183.247.221.119:30001'}
proxy3 ={'http' : '61.160.223.141:7302'}
proxy4 ={'http' : '223.82.60.202:8060'}
proxy5 ={'http' : '222.135.31.84:8060'}
proxy6 ={'http' : '27.148.196.28:8000'}
proxy7={'http': '106.54.141.54:3128'}
proxy8={'http': '182.92.194.49:8118'}
proxy9={'http': '39.175.92.35:30001'}
proxy10={'http': '39.108.71.54:8088'}
proxy11={'http': '58.220.95.90:9401'}
proxy12={'http': '47.93.48.155:8888'}
proxy13={'http': '202.109.157.65:9000'}
proxy14={'http': '39.108.71.54:8088'}
proxy15={'http': '58.220.95.31:10174'}
proxy16={'http': '47.92.234.75:80'}
proxy17={'http': '59.125.177.31:8080'}
proxy18={'http': '116.9.163.205:58080'}
proxy19={'http': '139.217.101.48:9080'}
proxy20={'http': '111.3.118.247:30001'}
proxy21={'http': '122.225.75.10:8081'}
proxy22={'http': '113.214.48.5:8000'}
proxy23={'http': '39.108.193.201:8888'}
proxy24={'http': '115.223.7.238:80'}
proxy25={'http': '125.73.209.108:80'}
proxy26={'http': '183.245.6.48:8080'}
proxy27={'http': '182.43.198.105:8888'}
proxy28={'http': '106.14.255.124:80'}
proxy29={'http': '39.108.137.133:3128'}
proxy30={'http': '61.216.185.88:60808'}
proxy31={'http': '39.175.67.28:30001'}
proxy32={'http': '39.108.56.233:38080'}
proxy=[proxy1,proxy2,proxy3,proxy4,proxy5,proxy6,proxy7,proxy8,proxy9,proxy10,proxy11,proxy12,proxy13,proxy14,proxy15,proxy16,proxy17,proxy18,proxy19,proxy20
,proxy21,proxy22,proxy23,proxy24,proxy25,proxy26,proxy27,proxy28,proxy29,proxy30,proxy31,proxy32]
proxyy=random.choice(proxy)
url1 = 'https://blog.csdn.net/weixin_42365095/article/details/123733081?spm=1001.2014.3001.5502'
url2 = 'https://blog.csdn.net/weixin_42365095/article/details/123714388?spm=1001.2014.3001.5502'
url3 = 'https://blog.csdn.net/weixin_42365095/article/details/123686672?spm=1001.2014.3001.5502'
url4 = 'https://blog.csdn.net/weixin_42365095/article/details/123574460?spm=1001.2014.3001.5502'
url5 = 'https://blog.csdn.net/weixin_42365095/article/details/123573839?spm=1001.2014.3001.5502'
url6 = 'https://blog.csdn.net/weixin_42365095/article/details/123573323?spm=1001.2014.3001.5502'
url7 = 'https://blog.csdn.net/weixin_42365095/article/details/123010695?spm=1001.2014.3001.5502'
url8 = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
url9 = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
url0 = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
url11 = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
url12 = 'https://blog.csdn.net/weixin_42365095/article/details/84437737'
url13 = 'https://blog.csdn.net/weixin_42365095/article/details/84442162'
url14 = 'https://blog.csdn.net/weixin_42365095/article/details/84446357'
url15 = 'https://blog.csdn.net/weixin_42365095/article/details/84889478'
url16 = 'https://blog.csdn.net/weixin_42365095/article/details/88693846'
url17 = 'https://blog.csdn.net/weixin_42365095/article/details/115637930'
url18 = 'https://blog.csdn.net/weixin_42365095/article/details/88421310'
url19 = 'https://blog.csdn.net/weixin_42365095/article/details/88422147'
url20 = 'https://blog.csdn.net/weixin_42365095/article/details/115533394'
url21 = 'https://blog.csdn.net/weixin_42365095/article/details/115871866'
url22 = 'https://blog.csdn.net/weixin_42365095/article/details/115578918'
url23 = 'https://blog.csdn.net/weixin_42365095/article/details/115604437'
url24 = 'https://blog.csdn.net/weixin_42365095/article/details/113181817'
url25 = 'https://blog.csdn.net/weixin_42365095/article/details/85950916'
url26 = 'https://blog.csdn.net/weixin_42365095/article/details/110440686'
url27 = 'https://blog.csdn.net/weixin_42365095/article/details/110441970'
url28 = 'https://blog.csdn.net/weixin_42365095/article/details/118734856'
url29 = 'https://blog.csdn.net/weixin_42365095/article/details/118731648'
url30 = 'https://blog.csdn.net/weixin_42365095/article/details/89215755'
url31 = 'https://blog.csdn.net/weixin_42365095/article/details/80948787'
url32 = 'https://blog.csdn.net/weixin_42365095/article/details/82744305'
url33 = 'https://blog.csdn.net/weixin_42365095/article/details/82840801'
url34 = 'https://blog.csdn.net/weixin_42365095/article/details/82871780'
url35 = 'https://blog.csdn.net/weixin_42365095/article/details/82873960'
url36 = 'https://blog.csdn.net/weixin_42365095/article/details/82874014'
url37 = 'https://blog.csdn.net/weixin_42365095/article/details/82884744'
url38 = 'https://blog.csdn.net/weixin_42365095/article/details/83002479'
url39 = 'https://blog.csdn.net/weixin_42365095/article/details/83003467'
url40 = 'https://blog.csdn.net/weixin_42365095/article/details/89634002'
Url = [url1, url2, url3, url4, url5, url6, url7,url8,url9,url0,url11,url12,url13,url14,url15,url16,url17,url18,url19,url20,url21,url22,url23,url24,url25,url26,url27,url28,url29,url30,url31,url32,url33,url34,url35,url36,url37,url38,url39,url40]
url = random.choice(Url) # 随机随机访问上面的文章
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46"}
通过 requests.get(url, headers=headers, proxies= proxyy).content.decode('utf-8')获取url,信息头,代理ip等。
# 导入需要用到的库
import requests
import lxml
import random
import time
from lxml import etree
import threading
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46"} # 可以通过抓包工具获取,模拟浏览器
proxy1={'http' : '109.197.188.12:8080'}
proxy2 ={'http' : '183.247.221.119:30001'}
proxy3 ={'http' : '61.160.223.141:7302'}
proxy4 ={'http' : '223.82.60.202:8060'}
proxy5 ={'http' : '222.135.31.84:8060'}
proxy6 ={'http' : '27.148.196.28:8000'}
proxy7={'http': '106.54.141.54:3128'}
proxy8={'http': '182.92.194.49:8118'}
proxy9={'http': '39.175.92.35:30001'}
proxy10={'http': '39.108.71.54:8088'}
proxy11={'http': '58.220.95.90:9401'}
proxy12={'http': '47.93.48.155:8888'}
proxy13={'http': '202.109.157.65:9000'}
proxy14={'http': '39.108.71.54:8088'}
proxy15={'http': '58.220.95.31:10174'}
proxy16={'http': '47.92.234.75:80'}
proxy17={'http': '59.125.177.31:8080'}
proxy18={'http': '116.9.163.205:58080'}
proxy19={'http': '139.217.101.48:9080'}
proxy20={'http': '111.3.118.247:30001'}
proxy21={'http': '122.225.75.10:8081'}
proxy22={'http': '113.214.48.5:8000'}
proxy23={'http': '39.108.193.201:8888'}
proxy24={'http': '115.223.7.238:80'}
proxy25={'http': '125.73.209.108:80'}
proxy26={'http': '183.245.6.48:8080'}
proxy27={'http': '182.43.198.105:8888'}
proxy28={'http': '106.14.255.124:80'}
proxy29={'http': '39.108.137.133:3128'}
proxy30={'http': '61.216.185.88:60808'}
proxy31={'http': '39.175.67.28:30001'}
proxy32={'http': '39.108.56.233:38080'}
proxy=[proxy1,proxy2,proxy3,proxy4,proxy5,proxy6,proxy7,proxy8,proxy9,proxy10,proxy11,proxy12,proxy13,proxy14,proxy15,proxy16,proxy17,proxy18,proxy19,proxy20
,proxy21,proxy22,proxy23,proxy24,proxy25,proxy26,proxy27,proxy28,proxy29,proxy30,proxy31,proxy32]
while True:
# 要刷的文章
url1 = 'https://blog.csdn.net/weixin_42365095/article/details/123733081?spm=1001.2014.3001.5502'
url2 = 'https://blog.csdn.net/weixin_42365095/article/details/123714388?spm=1001.2014.3001.5502'
url3 = 'https://blog.csdn.net/weixin_42365095/article/details/123686672?spm=1001.2014.3001.5502'
url4 = 'https://blog.csdn.net/weixin_42365095/article/details/123574460?spm=1001.2014.3001.5502'
url5 = 'https://blog.csdn.net/weixin_42365095/article/details/123573839?spm=1001.2014.3001.5502'
url6 = 'https://blog.csdn.net/weixin_42365095/article/details/123573323?spm=1001.2014.3001.5502'
url7 = 'https://blog.csdn.net/weixin_42365095/article/details/123010695?spm=1001.2014.3001.5502'
url8 = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
url9 = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
url0 = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
url11 = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
url12 = 'https://blog.csdn.net/weixin_42365095/article/details/84437737'
url13 = 'https://blog.csdn.net/weixin_42365095/article/details/84442162'
url14 = 'https://blog.csdn.net/weixin_42365095/article/details/84446357'
url15 = 'https://blog.csdn.net/weixin_42365095/article/details/84889478'
url16 = 'https://blog.csdn.net/weixin_42365095/article/details/88693846'
url17 = 'https://blog.csdn.net/weixin_42365095/article/details/115637930'
url18 = 'https://blog.csdn.net/weixin_42365095/article/details/88421310'
url19 = 'https://blog.csdn.net/weixin_42365095/article/details/88422147'
url20 = 'https://blog.csdn.net/weixin_42365095/article/details/115533394'
url21 = 'https://blog.csdn.net/weixin_42365095/article/details/115871866'
url22 = 'https://blog.csdn.net/weixin_42365095/article/details/115578918'
url23 = 'https://blog.csdn.net/weixin_42365095/article/details/115604437'
url24 = 'https://blog.csdn.net/weixin_42365095/article/details/113181817'
url25 = 'https://blog.csdn.net/weixin_42365095/article/details/85950916'
url26 = 'https://blog.csdn.net/weixin_42365095/article/details/110440686'
url27 = 'https://blog.csdn.net/weixin_42365095/article/details/110441970'
url28 = 'https://blog.csdn.net/weixin_42365095/article/details/118734856'
url29 = 'https://blog.csdn.net/weixin_42365095/article/details/118731648'
url30 = 'https://blog.csdn.net/weixin_42365095/article/details/89215755'
url31 = 'https://blog.csdn.net/weixin_42365095/article/details/80948787'
url32 = 'https://blog.csdn.net/weixin_42365095/article/details/82744305'
url33 = 'https://blog.csdn.net/weixin_42365095/article/details/82840801'
url34 = 'https://blog.csdn.net/weixin_42365095/article/details/82871780'
url35 = 'https://blog.csdn.net/weixin_42365095/article/details/82873960'
url36 = 'https://blog.csdn.net/weixin_42365095/article/details/82874014'
url37 = 'https://blog.csdn.net/weixin_42365095/article/details/82884744'
url38 = 'https://blog.csdn.net/weixin_42365095/article/details/83002479'
url39 = 'https://blog.csdn.net/weixin_42365095/article/details/83003467'
url40 = 'https://blog.csdn.net/weixin_42365095/article/details/89634002'
Url = [url1, url2, url3, url4, url5, url6, url7,url8,url9,url0,url11,url12,url13,url14,url15,url16,url17,url18,url19,url20,url21,url22,url23,url24,url25,url26,url27,url28,url29,url30,url31,url32,url33,url34,url35,url36,url37,url38,url39,url40]
url = random.choice(Url) # 随机随机访问上面的文章
proxyy=random.choice(proxy)
print(url, "*" * 1)
print(proxyy)
time.sleep(20) # 设置访问时间的间隔
response = requests.get(url, headers=headers, proxies= proxyy).content.decode('utf-8')
mytree = lxml.etree.HTML(response)
csdnlist = mytree.xpath('//div[@class="article-list"]/div')
for i in csdnlist:
try:
iUrl = i.xpath('./h4/a/@href')[0]
except:
response = requests.get(url=iUrl, headers=headers, proxies=proxy).content.decode('utf-8')
time.sleep(0.8)
# 和上面一样的随机访问其他的文章
urla = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
urlb = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
urlc = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
urld = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
URL = [urla, urlb, urlc, urld]
urll = random.choice(URL)
response1 = requests.get(url=urll, headers=headers, proxies=proxy).content.decode('utf-8')
# print(response1)
# print(response)
print(urll, "$" * 30)
print(iUrl, "@" * 30)