python 模拟登录csdn并获取博客列表等操作

# python3.3 可以登录成功
import urllib.parse, urllib.request, http.cookiejar, re


class Csdn():
    def __init__(self, username, password):
        self.username = username
        self.password = password
        self.createCookie()

    def createCookie(self):
        '创建cookie'
        cookie = http.cookiejar.CookieJar()
        cookieProc = urllib.request.HTTPCookieProcessor(cookie)
        self.opener = urllib.request.build_opener(cookieProc)

    def getKeyBeforeLogin(self):
        '在登录之前获取随机key'
        url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist'
        html = self.opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist').read().decode(
            "utf8")
        patten1 = re.compile(r'name="lt" value="(.*?)"')
        patten2 = re.compile(r'name="execution" value="(.*?)"')
        lt = patten1.search(html)
        execution = patten2.search(html)
        return {'lt': lt.group(1), 'execution': execution.group(1)}

    def login(self):
        '登录csdn'
        url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist'
        res = self.getKeyBeforeLogin()
        opener = self.opener
        postData = {
            'username': self.username,
            'password': self.password,
            'lt': res['lt'],
            'execution': res['execution'],
            '_eventId': 'submit',
        }
        opener.addheaders = [('host', 'passport.csdn.net'),
                             ('User-Agent',
                              'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),
                             ('Referer', 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit')
                             ]
        postData = urllib.parse.urlencode(postData).encode(encoding='UTF8')
        response = opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit', data=postData)
        text = response.read().decode('utf-8', 'ignore')
        pattenerror = r'<span id="error-message">帐户名或登录密码不正确'
        error = re.search(pattenerror, text)
        if error:
            self.loginStatus = 0
            self.errorInfo = '帐户名或登录密码不正确'
            raise Exception(self.errorInfo)
            return None
        else:
            self.loginStatus = 1
        patten = re.compile(r'var redirect = "(.*?)"')
        redirect = patten.search(text)
        return redirect.group(1)

    def visitRedirectAfterLogin(self, redirect):
        '在登录之后访问跳转'
        opener = self.opener
        response = opener.open(redirect)
        text = response.read().decode('utf-8', 'ignore')
        # tools.log(text, 'csdn_test.html')

    def visitBlogList(self):
        '访问博客列表'
        opener = self.opener
        response = opener.open('http://write.blog.csdn.net/postlist')
        text = response.read().decode('utf-8', 'ignore')
        list = self.handleBlogList(text)
        return list

    def handleBlogList(self, text):
        '处理博客分类'
        pattern = r"<tr(.*?)<a href='(.*?)'(.*?)>(.*?)</a>(.*?)\((.*?)\)</span>(.*?)</td><td>([0-9]+)</td><td>([0-9]+)([\s\S]*?)</tr>"
        matchs = re.findall(pattern, text)
        res = []
        if matchs:
            for i in matchs:
                list = {'url': i[1], 'name': i[3], 'time': i[5], 'readnum': i[7], 'comment': i[8]}
                res.append(list)
        return res

    def visitBlogCategory(self):
        '访问博客分类'
        opener = self.opener
        response = opener.open('http://write.blog.csdn.net/category')
        text = response.read().decode('utf-8', 'ignore')
        return self.handleBlogCategory(text)

    def handleBlogCategory(self, text):
        '处理博客分类'
        pattern = r"<td class='tdleft'><span>(.*?)</span></td>([\s\S]*?)<a href='#([0-9]+)'"
        matchs = re.findall(pattern, text)
        res = []
        if matchs:
            for i in matchs:
                res.append({'name': i[0], 'id': i[2]})
        return res

    def addBlogCategory(self, name):
        '添加博客分类,返回博客分类列表'
        opener = self.opener
        name = name.encode('utf-8', 'ignore')
        name = urllib.parse.quote(name)
        url = 'http://write.blog.csdn.net/category?t=add&name=%s' % name
        response = opener.open(url)
        text = response.read().decode('utf-8', 'ignore')
        return self.handleBlogCategory(text)

    def editBlogCategory(self, id, name):
        '修改博客分类名称'
        opener = self.opener
        name = name.encode('utf-8', 'ignore')
        name = urllib.parse.quote(name)
        url = 'http://write.blog.csdn.net/category?t=edit&id=%s&name=%s' % (id, name)
        response = opener.open(url)
        text = response.read().decode('utf-8', 'ignore')
        return self.handleBlogCategory(text)

    def main(self):
        '主方法'
        redirect = self.login()
        # self.visitRedirectAfterLogin(redirect)
        # 如果不调用 visitRedirectAfterLogin 方法 访问博客列表时有问题  不知道为什么
        self.visitRedirectAfterLogin(redirect)
        blogList = self.visitBlogList()
        for i in blogList:
            print(i)
        categorys = self.visitBlogCategory()
        for i in categorys:
            print(i)
        # self.addBlogCategory('test_csdn12')
        res = []
        # res = self.addBlogCategory('测试csdn2')
        if len(res) > 0:
            endData = res[-1]
            print(endData)
            self.editBlogCategory(endData['id'], 'test_add')
            print(self.visitBlogCategory())


if __name__ == '__main__':
    csdn = Csdn('csdnusername', 'csdnpassword')
    csdn.main()

你可能感兴趣的:(python 模拟登录csdn并获取博客列表等操作)