python脚本实现自动访问博客

python脚本实现自动化访问博客,内置多个代理ip,可随机切换用户随机访问博文

设置随机选择代理(数目可增减):

#proxies参数是键值对形式,使用字典存储
proxy1={'http' : '109.197.188.12:8080'}
proxy2 ={'http' : '183.247.221.119:30001'}
proxy3 ={'http' : '61.160.223.141:7302'}
proxy4 ={'http' : '223.82.60.202:8060'}
proxy5 ={'http' : '222.135.31.84:8060'}
proxy6 ={'http' : '27.148.196.28:8000'}
proxy7={'http': '106.54.141.54:3128'}
proxy8={'http': '182.92.194.49:8118'}
proxy9={'http': '39.175.92.35:30001'}
proxy10={'http': '39.108.71.54:8088'}
proxy11={'http': '58.220.95.90:9401'}
proxy12={'http': '47.93.48.155:8888'}
proxy13={'http': '202.109.157.65:9000'}
proxy14={'http': '39.108.71.54:8088'}
proxy15={'http': '58.220.95.31:10174'}
proxy16={'http': '47.92.234.75:80'}
proxy17={'http': '59.125.177.31:8080'}
proxy18={'http': '116.9.163.205:58080'}
proxy19={'http': '139.217.101.48:9080'}
proxy20={'http': '111.3.118.247:30001'}
proxy21={'http': '122.225.75.10:8081'}
proxy22={'http': '113.214.48.5:8000'}
proxy23={'http': '39.108.193.201:8888'}
proxy24={'http': '115.223.7.238:80'}
proxy25={'http': '125.73.209.108:80'}
proxy26={'http': '183.245.6.48:8080'}
proxy27={'http': '182.43.198.105:8888'}
proxy28={'http': '106.14.255.124:80'}
proxy29={'http': '39.108.137.133:3128'}
proxy30={'http': '61.216.185.88:60808'}
proxy31={'http': '39.175.67.28:30001'}
proxy32={'http': '39.108.56.233:38080'}

proxy=[proxy1,proxy2,proxy3,proxy4,proxy5,proxy6,proxy7,proxy8,proxy9,proxy10,proxy11,proxy12,proxy13,proxy14,proxy15,proxy16,proxy17,proxy18,proxy19,proxy20
       ,proxy21,proxy22,proxy23,proxy24,proxy25,proxy26,proxy27,proxy28,proxy29,proxy30,proxy31,proxy32]
 proxyy=random.choice(proxy)

设置要访问的博文(数目可增减):

 url1 = 'https://blog.csdn.net/weixin_42365095/article/details/123733081?spm=1001.2014.3001.5502'
    url2 = 'https://blog.csdn.net/weixin_42365095/article/details/123714388?spm=1001.2014.3001.5502'
    url3 = 'https://blog.csdn.net/weixin_42365095/article/details/123686672?spm=1001.2014.3001.5502'
    url4 = 'https://blog.csdn.net/weixin_42365095/article/details/123574460?spm=1001.2014.3001.5502'
    url5 = 'https://blog.csdn.net/weixin_42365095/article/details/123573839?spm=1001.2014.3001.5502'
    url6 = 'https://blog.csdn.net/weixin_42365095/article/details/123573323?spm=1001.2014.3001.5502'
    url7 = 'https://blog.csdn.net/weixin_42365095/article/details/123010695?spm=1001.2014.3001.5502'
    url8 = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
    url9 = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
    url0 = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
    url11 = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
    url12 = 'https://blog.csdn.net/weixin_42365095/article/details/84437737'
    url13 = 'https://blog.csdn.net/weixin_42365095/article/details/84442162'
    url14 = 'https://blog.csdn.net/weixin_42365095/article/details/84446357'
    url15 = 'https://blog.csdn.net/weixin_42365095/article/details/84889478'
    url16 = 'https://blog.csdn.net/weixin_42365095/article/details/88693846'
    url17 = 'https://blog.csdn.net/weixin_42365095/article/details/115637930'
    url18 = 'https://blog.csdn.net/weixin_42365095/article/details/88421310'
    url19 = 'https://blog.csdn.net/weixin_42365095/article/details/88422147'
    url20 = 'https://blog.csdn.net/weixin_42365095/article/details/115533394'
    url21 = 'https://blog.csdn.net/weixin_42365095/article/details/115871866'
    url22 = 'https://blog.csdn.net/weixin_42365095/article/details/115578918'
    url23 = 'https://blog.csdn.net/weixin_42365095/article/details/115604437'
    url24 = 'https://blog.csdn.net/weixin_42365095/article/details/113181817'
    url25 = 'https://blog.csdn.net/weixin_42365095/article/details/85950916'
    url26 = 'https://blog.csdn.net/weixin_42365095/article/details/110440686'
    url27 = 'https://blog.csdn.net/weixin_42365095/article/details/110441970'
    url28 = 'https://blog.csdn.net/weixin_42365095/article/details/118734856'
    url29 = 'https://blog.csdn.net/weixin_42365095/article/details/118731648'
    url30 = 'https://blog.csdn.net/weixin_42365095/article/details/89215755'
    url31 = 'https://blog.csdn.net/weixin_42365095/article/details/80948787'
    url32 = 'https://blog.csdn.net/weixin_42365095/article/details/82744305'
    url33 = 'https://blog.csdn.net/weixin_42365095/article/details/82840801'
    url34 = 'https://blog.csdn.net/weixin_42365095/article/details/82871780'
    url35 = 'https://blog.csdn.net/weixin_42365095/article/details/82873960'
    url36 = 'https://blog.csdn.net/weixin_42365095/article/details/82874014'
    url37 = 'https://blog.csdn.net/weixin_42365095/article/details/82884744'
    url38 = 'https://blog.csdn.net/weixin_42365095/article/details/83002479'
    url39 = 'https://blog.csdn.net/weixin_42365095/article/details/83003467'
    url40 = 'https://blog.csdn.net/weixin_42365095/article/details/89634002'

    Url = [url1, url2, url3, url4, url5, url6, url7,url8,url9,url0,url11,url12,url13,url14,url15,url16,url17,url18,url19,url20,url21,url22,url23,url24,url25,url26,url27,url28,url29,url30,url31,url32,url33,url34,url35,url36,url37,url38,url39,url40]
    url = random.choice(Url)  # 随机随机访问上面的文章

获取博客请求头信息(可以通过抓包工具或F12获取)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46"} 

通过 requests.get(url, headers=headers, proxies= proxyy).content.decode('utf-8')获取url,信息头,代理ip等。

完整代码:

# 导入需要用到的库
import requests
import lxml
import random
import time
from lxml import etree
import threading

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.46"}  # 可以通过抓包工具获取,模拟浏览器
proxy1={'http' : '109.197.188.12:8080'}
proxy2 ={'http' : '183.247.221.119:30001'}
proxy3 ={'http' : '61.160.223.141:7302'}
proxy4 ={'http' : '223.82.60.202:8060'}
proxy5 ={'http' : '222.135.31.84:8060'}
proxy6 ={'http' : '27.148.196.28:8000'}
proxy7={'http': '106.54.141.54:3128'}
proxy8={'http': '182.92.194.49:8118'}
proxy9={'http': '39.175.92.35:30001'}
proxy10={'http': '39.108.71.54:8088'}
proxy11={'http': '58.220.95.90:9401'}
proxy12={'http': '47.93.48.155:8888'}
proxy13={'http': '202.109.157.65:9000'}
proxy14={'http': '39.108.71.54:8088'}
proxy15={'http': '58.220.95.31:10174'}
proxy16={'http': '47.92.234.75:80'}
proxy17={'http': '59.125.177.31:8080'}
proxy18={'http': '116.9.163.205:58080'}
proxy19={'http': '139.217.101.48:9080'}
proxy20={'http': '111.3.118.247:30001'}
proxy21={'http': '122.225.75.10:8081'}
proxy22={'http': '113.214.48.5:8000'}
proxy23={'http': '39.108.193.201:8888'}
proxy24={'http': '115.223.7.238:80'}
proxy25={'http': '125.73.209.108:80'}
proxy26={'http': '183.245.6.48:8080'}
proxy27={'http': '182.43.198.105:8888'}
proxy28={'http': '106.14.255.124:80'}
proxy29={'http': '39.108.137.133:3128'}
proxy30={'http': '61.216.185.88:60808'}
proxy31={'http': '39.175.67.28:30001'}
proxy32={'http': '39.108.56.233:38080'}

proxy=[proxy1,proxy2,proxy3,proxy4,proxy5,proxy6,proxy7,proxy8,proxy9,proxy10,proxy11,proxy12,proxy13,proxy14,proxy15,proxy16,proxy17,proxy18,proxy19,proxy20
       ,proxy21,proxy22,proxy23,proxy24,proxy25,proxy26,proxy27,proxy28,proxy29,proxy30,proxy31,proxy32]
while True:
    # 要刷的文章
    url1 = 'https://blog.csdn.net/weixin_42365095/article/details/123733081?spm=1001.2014.3001.5502'
    url2 = 'https://blog.csdn.net/weixin_42365095/article/details/123714388?spm=1001.2014.3001.5502'
    url3 = 'https://blog.csdn.net/weixin_42365095/article/details/123686672?spm=1001.2014.3001.5502'
    url4 = 'https://blog.csdn.net/weixin_42365095/article/details/123574460?spm=1001.2014.3001.5502'
    url5 = 'https://blog.csdn.net/weixin_42365095/article/details/123573839?spm=1001.2014.3001.5502'
    url6 = 'https://blog.csdn.net/weixin_42365095/article/details/123573323?spm=1001.2014.3001.5502'
    url7 = 'https://blog.csdn.net/weixin_42365095/article/details/123010695?spm=1001.2014.3001.5502'
    url8 = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
    url9 = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
    url0 = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
    url11 = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
    url12 = 'https://blog.csdn.net/weixin_42365095/article/details/84437737'
    url13 = 'https://blog.csdn.net/weixin_42365095/article/details/84442162'
    url14 = 'https://blog.csdn.net/weixin_42365095/article/details/84446357'
    url15 = 'https://blog.csdn.net/weixin_42365095/article/details/84889478'
    url16 = 'https://blog.csdn.net/weixin_42365095/article/details/88693846'
    url17 = 'https://blog.csdn.net/weixin_42365095/article/details/115637930'
    url18 = 'https://blog.csdn.net/weixin_42365095/article/details/88421310'
    url19 = 'https://blog.csdn.net/weixin_42365095/article/details/88422147'
    url20 = 'https://blog.csdn.net/weixin_42365095/article/details/115533394'
    url21 = 'https://blog.csdn.net/weixin_42365095/article/details/115871866'
    url22 = 'https://blog.csdn.net/weixin_42365095/article/details/115578918'
    url23 = 'https://blog.csdn.net/weixin_42365095/article/details/115604437'
    url24 = 'https://blog.csdn.net/weixin_42365095/article/details/113181817'
    url25 = 'https://blog.csdn.net/weixin_42365095/article/details/85950916'
    url26 = 'https://blog.csdn.net/weixin_42365095/article/details/110440686'
    url27 = 'https://blog.csdn.net/weixin_42365095/article/details/110441970'
    url28 = 'https://blog.csdn.net/weixin_42365095/article/details/118734856'
    url29 = 'https://blog.csdn.net/weixin_42365095/article/details/118731648'
    url30 = 'https://blog.csdn.net/weixin_42365095/article/details/89215755'
    url31 = 'https://blog.csdn.net/weixin_42365095/article/details/80948787'
    url32 = 'https://blog.csdn.net/weixin_42365095/article/details/82744305'
    url33 = 'https://blog.csdn.net/weixin_42365095/article/details/82840801'
    url34 = 'https://blog.csdn.net/weixin_42365095/article/details/82871780'
    url35 = 'https://blog.csdn.net/weixin_42365095/article/details/82873960'
    url36 = 'https://blog.csdn.net/weixin_42365095/article/details/82874014'
    url37 = 'https://blog.csdn.net/weixin_42365095/article/details/82884744'
    url38 = 'https://blog.csdn.net/weixin_42365095/article/details/83002479'
    url39 = 'https://blog.csdn.net/weixin_42365095/article/details/83003467'
    url40 = 'https://blog.csdn.net/weixin_42365095/article/details/89634002'

    Url = [url1, url2, url3, url4, url5, url6, url7,url8,url9,url0,url11,url12,url13,url14,url15,url16,url17,url18,url19,url20,url21,url22,url23,url24,url25,url26,url27,url28,url29,url30,url31,url32,url33,url34,url35,url36,url37,url38,url39,url40]
    url = random.choice(Url)  # 随机随机访问上面的文章
    proxyy=random.choice(proxy)
    print(url, "*" * 1)
    print(proxyy)
    time.sleep(20)  # 设置访问时间的间隔
    response = requests.get(url, headers=headers, proxies= proxyy).content.decode('utf-8')

    mytree = lxml.etree.HTML(response)
    csdnlist = mytree.xpath('//div[@class="article-list"]/div')
    for i in csdnlist:
        try:
            iUrl = i.xpath('./h4/a/@href')[0]

        except:
            response = requests.get(url=iUrl, headers=headers, proxies=proxy).content.decode('utf-8')
            time.sleep(0.8)
            # 和上面一样的随机访问其他的文章
            urla = 'https://blog.csdn.net/weixin_42365095/article/details/122785775?spm=1001.2014.3001.5502'
            urlb = 'https://blog.csdn.net/weixin_42365095/article/details/122783180?spm=1001.2014.3001.5502'
            urlc = 'https://blog.csdn.net/weixin_42365095/article/details/122643692?spm=1001.2014.3001.5502'
            urld = 'https://blog.csdn.net/weixin_42365095/article/details/122637765?spm=1001.2014.3001.5502'
            URL = [urla, urlb, urlc, urld]
            urll = random.choice(URL)
            response1 = requests.get(url=urll, headers=headers, proxies=proxy).content.decode('utf-8')
            # print(response1)
            # print(response)
            print(urll, "$" * 30)
            print(iUrl, "@" * 30)

结果显示(可随机选择代理随机访问博客):

python脚本实现自动访问博客_第1张图片

你可能感兴趣的:(#,自动化脚本,Python,python)