o(∩_∩)o设置代理访问博客o(∩_∩)o

设置代理访问博客

直接看代码:

# -*- coding:UTF-8 -*-

import re
import threading
import time
import random
import requests
from bs4 import BeautifulSoup

firefoxHead = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
IPRegular = r"(([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5]).){3}([1-9]?\d|1\d{2}|2[0-4]\d|25[0-5])"
host = "https://blog.csdn.net"
uid = "qq_17328759" # CSDN的ID



def parseIPList(url="http://www.xicidaili.com/"):
    """
    获取代理地址
    :param url:代理之地的网站
    :return: 代理的地址列表
    """
    IPs, ip = [], []
    response = requests.get(url, headers=firefoxHead).content
    soup = BeautifulSoup(response)
    tds = soup.find_all("td")
    for td in tds:
        if td.attrs.__len__() == 0:
            if re.search(IPRegular, td.text) :
                # print td.text
                ip.append(td.text)
            if td.text.isdigit():
                # print td.text
                ip.append(td.text)
                # print ip
                IPs.append([ip[0], ip[1]])
                ip.__init__()
    return IPs

def articleId(uid):
    """
    获取用户的博客列表
    :param uid: 用户的账号
    :return: 用户博客列表
    """
    articleIds = []
    articleList = host + "/" + uid
    response = requests.get(articleList, headers=firefoxHead).content
    soup = BeautifulSoup(response)
    tds = soup.find_all("div" , attrs={"data-articleid" : True})
    for td in tds:
        articleIds.append(td.attrs['data-articleid'])
    return articleIds


def PV(IPs, uid, codes):
    s = requests.Session()
    s.headers = firefoxHead
    count = 0
    url = host + "/{}/article/details/{}"

    while True:
        count += 1
        print("正在进行第{}次访问\t".format(count))
        proxie = random.choice(IPs)
        s.proxies = {"http": "{}:{}".format(proxie[0], proxie[1])}
        tx = s.get(host)

        for code in codes:
            articleUrl = url.format(uid, code)
            # print articleUrl
            html = s.get(articleUrl).text
            soup = BeautifulSoup(html, "html.parser")
            spans = soup.find_all("span")
            if spans[2].attrs['class'][0] == u'read-count' :
                print (spans[2].text)
            else:
                print (spans[3].text)
        # time.sleep(random.randint(60, 75))


class addReadNum(threading.Thread):
    def __init__(self, IPs, uid, articleIds):
        threading.Thread.__init__(self)
        self.IPs = IPs
        self.uid = uid
        self.articleIds = articleIds

    def run(self):
        PV(self.IPs, self.uid, self.articleIds)

def main():
    articleIds = articleId(uid) #   ["103781592"] #
    IPs = parseIPList()
    # PV(IPs, uid, articleIds)
    # import thread
    # thread.start_new_thread(PV, (IPs, uid, articleIds))
    thr1 = addReadNum(IPs, uid, articleIds)
    # thr2 = addReadNum(IPs, uid, articleIds)
    # thr3 = addReadNum(IPs, uid, articleIds)

    thr1.start()
    # thr2.start()
    # thr3.start()


if __name__ == "__main__":
    main()

当然其网站的博客的阅读量也类似啦。

脚本思路分析:

1.通过设置代理,实现模拟博客阅读,以增加博客的阅读数量
2.在获取代理地址和端口
3.获取博客地址

具体实现方法:

使用BeautifulSoup库实现功能:获取代理地址和端口,通过CSDN ID获取你的所有博客编号。

  • 获取http请求结果:
    html = s.get(articleUrl).text
  • BeautifulSoup对象实例化:
    soup = BeautifulSoup(html, “html.parser”)
  • 查找出对应的节点:
    1、无属性节点
    spans = soup.find_all(“span”)
    2、有属性节点
    div = soup.find_all(“div” , attrs={“data-articleid” : True})

requests库使用描述:

  • 创建session实例:
    s = requests.Session()
  • 添加请求头:
    s.headers = firefoxHead
  • 添加代理地址和端口:
    s.proxies = {“http”: “{}:{}”.format(proxie[0], proxie[1])}
  • 发起请求
    tx = s.get(host)

你可能感兴趣的:(common)