# python3.3 可以登录成功 import urllib.parse, urllib.request, http.cookiejar, re class Csdn(): def __init__(self, username, password): self.username = username self.password = password self.createCookie() def createCookie(self): '创建cookie' cookie = http.cookiejar.CookieJar() cookieProc = urllib.request.HTTPCookieProcessor(cookie) self.opener = urllib.request.build_opener(cookieProc) def getKeyBeforeLogin(self): '在登录之前获取随机key' url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist' html = self.opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist').read().decode( "utf8") patten1 = re.compile(r'name="lt" value="(.*?)"') patten2 = re.compile(r'name="execution" value="(.*?)"') lt = patten1.search(html) execution = patten2.search(html) return {'lt': lt.group(1), 'execution': execution.group(1)} def login(self): '登录csdn' url = 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postlist' res = self.getKeyBeforeLogin() opener = self.opener postData = { 'username': self.username, 'password': self.password, 'lt': res['lt'], 'execution': res['execution'], '_eventId': 'submit', } opener.addheaders = [('host', 'passport.csdn.net'), ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'), ('Referer', 'https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit') ] postData = urllib.parse.urlencode(postData).encode(encoding='UTF8') response = opener.open('https://passport.csdn.net/?service=http://write.blog.csdn.net/postedit', data=postData) text = response.read().decode('utf-8', 'ignore') pattenerror = r'<span id="error-message">帐户名或登录密码不正确' error = re.search(pattenerror, text) if error: self.loginStatus = 0 self.errorInfo = '帐户名或登录密码不正确' raise Exception(self.errorInfo) return None else: self.loginStatus = 1 patten = re.compile(r'var redirect = "(.*?)"') redirect = patten.search(text) return redirect.group(1) def visitRedirectAfterLogin(self, redirect): '在登录之后访问跳转' opener = self.opener response = opener.open(redirect) text = response.read().decode('utf-8', 'ignore') # tools.log(text, 'csdn_test.html') def visitBlogList(self): '访问博客列表' opener = self.opener response = opener.open('http://write.blog.csdn.net/postlist') text = response.read().decode('utf-8', 'ignore') list = self.handleBlogList(text) return list def handleBlogList(self, text): '处理博客分类' pattern = r"<tr(.*?)<a href='(.*?)'(.*?)>(.*?)</a>(.*?)\((.*?)\)</span>(.*?)</td><td>([0-9]+)</td><td>([0-9]+)([\s\S]*?)</tr>" matchs = re.findall(pattern, text) res = [] if matchs: for i in matchs: list = {'url': i[1], 'name': i[3], 'time': i[5], 'readnum': i[7], 'comment': i[8]} res.append(list) return res def visitBlogCategory(self): '访问博客分类' opener = self.opener response = opener.open('http://write.blog.csdn.net/category') text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def handleBlogCategory(self, text): '处理博客分类' pattern = r"<td class='tdleft'><span>(.*?)</span></td>([\s\S]*?)<a href='#([0-9]+)'" matchs = re.findall(pattern, text) res = [] if matchs: for i in matchs: res.append({'name': i[0], 'id': i[2]}) return res def addBlogCategory(self, name): '添加博客分类,返回博客分类列表' opener = self.opener name = name.encode('utf-8', 'ignore') name = urllib.parse.quote(name) url = 'http://write.blog.csdn.net/category?t=add&name=%s' % name response = opener.open(url) text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def editBlogCategory(self, id, name): '修改博客分类名称' opener = self.opener name = name.encode('utf-8', 'ignore') name = urllib.parse.quote(name) url = 'http://write.blog.csdn.net/category?t=edit&id=%s&name=%s' % (id, name) response = opener.open(url) text = response.read().decode('utf-8', 'ignore') return self.handleBlogCategory(text) def main(self): '主方法' redirect = self.login() # self.visitRedirectAfterLogin(redirect) # 如果不调用 visitRedirectAfterLogin 方法 访问博客列表时有问题 不知道为什么 self.visitRedirectAfterLogin(redirect) blogList = self.visitBlogList() for i in blogList: print(i) categorys = self.visitBlogCategory() for i in categorys: print(i) # self.addBlogCategory('test_csdn12') res = [] # res = self.addBlogCategory('测试csdn2') if len(res) > 0: endData = res[-1] print(endData) self.editBlogCategory(endData['id'], 'test_add') print(self.visitBlogCategory()) if __name__ == '__main__': csdn = Csdn('csdnusername', 'csdnpassword') csdn.main()