python 增加csdn访问量升级

主程序

#!/usr/bin/python
# -*- coding:utf-8 -*-
import urllib.request, http.cookiejar, re
import time
import threading, sys
# tools是我的自定义工具类
import tools
import socket

'''
模拟访问博客增加访问量
'''
socket.setdefaulttimeout(1.0)
visitnum = 2
if len(sys.argv) > 1:
    visitnum = sys.argv[1]
visitnum = int(visitnum)


class Csdn(threading.Thread):
    'csdn增加访问量'
    contents = {}
    headers = [('host', 'blog.csdn.net'),
               ('User-Agent',
                'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
               ]
    domain = 'http://blog.csdn.net'
    url = 'http://blog.csdn.net/%s/article/list/%s'

    def __init__(self, username, page, times):
        threading.Thread.__init__(self)
        self.username = username
        self.createOpener()
        self.page = page
        self.times = times

    @staticmethod
    def getListPages(username,page=1,count=1):
        '获取总页数'
        key = 'page_' + str(page)
        if page==1:
            Csdn.headers.append(('Referer', 'http://blog.csdn.net/' + username))
        cookie = http.cookiejar.CookieJar()
        cookieProc = urllib.request.HTTPCookieProcessor(cookie)
        opener = urllib.request.build_opener(cookieProc)
        opener.addheaders = Csdn.headers
        url = Csdn.url % (username, page)
        try:
            response = opener.open(url)
        except Exception as e:
            if count>page:
                Csdn.getListPages(username, page + 1, count)
            else:
                raise Exception('出错了')
        Csdn.contents[key] = contents = response.read().decode('utf-8', 'ignore')
        pattern = r'
([\s\S]*?)共(\d+)页' match = re.search(pattern, contents) pages = int(match.group(2)) if pages>page: Csdn.getListPages(username,page+1,pages) return pages def createOpener(self): cookie = http.cookiejar.CookieJar() cookieProc = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(cookieProc) opener.addheaders = Csdn.headers self.opener = opener def visitUrl(self): '访问列表页获取内容' key = 'page_' + str(self.page) if key in Csdn.contents: print('--------%s exists' % key) self.contents = Csdn.contents[key] else: opener = self.opener url = Csdn.url % (self.username, self.page) try: response = opener.open(url) self.contents = contents = response.read().decode('utf-8', 'ignore') except Exception as e: print("访问 %s 出错 " % url) return Csdn.contents[key] = contents self.addVisitNum() def addVisitNum(self): opener = self.opener contents = self.contents divPattern = r'
' ulMatch = re.search(divPattern, contents) divText = ulMatch.group(1) smallPattern = r'
([\s\S]*?)([\s\S]*?)([\s\S]*?)阅读\((\d+)\)' match = re.findall(smallPattern, divText) for i in match: list = {'url': Csdn.domain + i[1], 'name': i[2].strip(), 'num': i[4]} try: opener.open(list['url']) except Exception as e: print("当前第%s次访问,访问出错,url:%s" % (self.times, list['url'])) continue print("当前第%s次访问,第%s页,url:%s" % (self.times, self.page, list['url'])) def run(self): '线程主方法' self.visitUrl() class MainCsdn(threading.Thread): def __init__(self, username, times, pages): threading.Thread.__init__(self) self.username = username self.times = times self.pages = pages def run(self): pages = self.pages username = self.username threads = [] for page in range(1, pages + 1): thread = Csdn(username, page, times=self.times) thread.start() threads.append(thread) # 等待所有线程完成 for t in threads: t.join() print(self.times, " 退出第%s次执行任务" % self.times) @tools.runTime def main(): '主方法' # csdn昵称 username = 'csdn昵称' pages = Csdn.getListPages(username,1,1) threads = [] # startThread = MainCsdn(username=username, times=0, pages=pages) # startThread.start() # startThread.join() for i in range(1, visitnum + 1): thread = MainCsdn(username=username, times=i, pages=pages) thread.start() threads.append(thread) # 等待所有线程完成 for t in threads: t.join() print("退出主线程") if __name__ == '__main__': main()



tools.py
#!/usr/bin/python
# -*- coding:utf-8 -*-
import time

'''
自定义工具方法,tools.py
'''


def runTime(func):
    '记录程序运行时间'

    def newFunc(*args, **kwargs):
        start = time.clock()
        res = func(*args, **kwargs)
        end = time.clock()
        print("read: %f s" % (end - start))
        return res

    return newFunc


def log(content, file='test.log', type=1):
    if type == 1:
        f = open(file, 'a+', encoding='utf-8')
    else:
        f = open(file, 'w+', encoding='utf-8')
    content=str(content)
    f.write(content)



你可能感兴趣的:(python)