“
2014/06/28修改
添加了验证码操作
2014/06/28修改
之前的写得实在太烂了, 所以重写了代码
”
流程:
1. 访问一次框架的源码(地址), 获取login_sig, appid, pt_version, mibao, pt_lang等参数;
2. 访问获取验证码的地址, 获取验证码(如果需要输入验证码, 则需要再访问其返回的获取验证码的地址, 通常不用验证码的话, 第二个参数就是验证码);
3. 根据QQ号, QQ密码, 验证码的值计算得出p的值(第一次登录需要用到, 计算方法在PSWEncrypt模块中);
4. 构建相关的Form, 并对地址(https://ssl.ptlogin2.qq.com/login)发送请求(必须使用Get方法, Post会出错), 登录成功后会返回一些参数, 例如二次登录需要访问的地址等, 其中ptwebqq在cookie中, 需要另外获取。。;
5. 访问一次第一次登录时返回的地址, 构建相关的Form(参数大多在之前已经得到了, 其中clientid参数在数字10000000-99999999中随便选一个就行了), 并向地址(http://d.web2.qq.com/channel/login2)发送请求, 登录成功后会返回相关的参数, 如uin, status, vfwebqq, psessionid等, 保存下来, 后面其他操作会用到的。
6.至此, webqq登录已经完成。
相关代码:
WebQQLogin.py
#coding=utf-8
import re;
import json;
import http;
import urllib;
import random;
import http.cookiejar;
import urllib.request;
from urllib.parse import urlencode;
from PSWEncrypt import PSWEncrypt;
from Queryable import *;
URL_LOGIN = "https://ssl.ptlogin2.qq.com/login?";
URL_LOGIN2 = "http://d.web2.qq.com/channel/login2";
#打开Chrome访问w.qq.com后右键查看框架源码就能看到这个了, 这个里面包含了登录时要用到的大部分信息
URL_LOAD_INFO = "https://ui.ptlogin2.qq.com/cgi-bin/login?daid=164&target=self&style=16&mibao_css=m_webqq&appid=501004106&enable_qlogin=0&no_verifyimg=1&s_url=http%3A%2F%2Fw.qq.com%2Fproxy.html&f_url=loginerroralert&strong_login=1&login_state=10&t=20131024001";
URL_VERYCODE = "https://ssl.ptlogin2.qq.com/check?";
URL_VERYCODE_IMG = "https://ssl.captcha.qq.com/getimage?";
webQQHeader = {
"Host":"ui.ptlogin2.qq.com",
"Referer":"http://w.qq.com/",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.1916.153 Safari/537.36",
};
#后面大多数模块都要用到这个header.....
webQQHeader2 = {
"Referer":"http://d.web2.qq.com/proxy.html?v=20130916001&callback=1&id=2",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"
}
class WebQQLogin(Queryable):
def __init__(self):
#先保存一个clientid(8位), 第二次登录的时候要将这个id上传
Queryable.__init__(self, {"clientid":str(random.randint(10000000, 99999999))});
self._cookie = http.cookiejar.LWPCookieJar();
self._opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self._cookie));
#多次正则, 感觉效率应该会很低, 但又没有更好的抓取办法
def _readInfo(self, url):
request = urllib.request.Request(URL_LOAD_INFO, headers = webQQHeader);
ret = self._opener.open(request).read().decode("utf-8");
login_sig = re.compile("g_login_sig=encodeURIComponent\(\"([^\"]+)\"\)").findall(ret)[0];
appid = re.compile("g_appid =encodeURIComponent\(\"(\d+)\"\)").findall(ret)[0];
pt_version = re.compile("g_pt_version=encodeURIComponent\(\"(\d+)\"\)").findall(ret)[0];
mibao = re.compile("mibao_css=encodeURIComponent\(\"(\w+)\"\)").findall(ret)[0];
pt_lang = re.compile("g_lang=\"([^\"]+)\"").findall(ret)[0];
#open("1.txt", "wb").write(ret.encode("utf-8"));
hiddenList = re.compile('').findall(ret);
for item in hiddenList:
self.setQuery(item[0], item[1]);
self.setQueryEx({"login_sig":login_sig, "appid":appid, "js_ver":pt_version, "mibao":mibao, "pt_lang":pt_lang});
def _requestVerifyCode(self, param):
headerData = {
"daid":self.queryInfo("daid"),
"target":"self",
"style":"16",
"mibao_css":self.queryInfo("mibao"),
"appid":self.queryInfo("appid"),
"enable_qlogin":"0",
"no_verifyimg":"1",
"s_url":"http://w.qq.com/proxy.html",
"f_url":"loginerroralert",
"strong_login":"1",
"login_state":"10",
"t":"20131024001",
};
header = {
"Host":"Host:ssl.captcha.qq.com",
"Referer":"https://ui.ptlogin2.qq.com/cgi-bin/login?" + urlencode(headerData),
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
};
data = {
"aid":self.queryInfo("appid"),
"r":"0.1642276826314628",
"uin":self.queryInfo("uin"),
};
request = urllib.request.Request(URL_VERYCODE_IMG + urlencode(data), headers = header);
open("verifyCode.jpg", "wb").write(self._opener.open(request).read());
verifyCode = input("输入验证码:");
return verifyCode;
#虽然这个是获取验证码部分, 但一直都没碰到过验证码, 所以。。
def _getVerycode(self, id):
form = {"login_sig":self.queryInfo("login_sig"), "r":"0.3272944095078856", "js_type":"0", "appid":self.queryInfo("appid"), "js_ver":self.queryInfo("js_ver"), "u1":"http://w.qq.com/proxy.html", "uin":id};
request = urllib.request.Request(URL_VERYCODE + urlencode(form));
ret = self._opener.open(request).read().decode("utf-8");
list = re.compile("ptui_checkVC\('(\d+)','([^\']*)','([^\']+)', '([^\']*)'\);").findall(ret)[0];
if(list[0] == "0"):
return list[1];
else:
return self._requestVerifyCode(list[1]);
print(list);#debug
return None;
#第一次登录
def _firstLogin(self, id, psw):
verifycode = self._getVerycode(id);
#大部分都用到了之前框架里面抓取到的东西, 避免硬编码(虽然还是有很多无法避免地用到了硬编码)
form = {
"u":id,
"p":PSWEncrypt.encrypt(id, psw, verifycode),
"verifycode":verifycode,
"webqq_type":self.queryInfo("webqq_type"),
"remember_uin":self.queryInfo("remember_uin"),
"login2qq":self.queryInfo("login2qq"),
"aid":self.queryInfo("appid"),
"u1":"{0}?login2qq={1}&webqq_type={2}".format(self.queryInfo("u1"), self.queryInfo("login2qq"), self.queryInfo("webqq_type")),
"h":self.queryInfo("h"),
"ptredirect":self.queryInfo("ptredirect"),
"ptlang":self.queryInfo("pt_lang"),
"daid":self.queryInfo("daid"),
"from_ui":self.queryInfo("from_ui"),
"pttype":self.queryInfo("pttype"),
"dumy":self.queryInfo("dumy"),
"fp":self.queryInfo("fp"),
"action":"0-23-34008",
"mibao_css":self.queryInfo("mibao"),
"t":"1",
"g":"1",
"js_type":"0",
"js_ver":self.queryInfo("js_ver"),
"login_sig":self.queryInfo("login_sig"),
};
#在这里用post方法的话。。会出错
request = urllib.request.Request(URL_LOGIN + urlencode(form), headers = webQQHeader);
ret = self._opener.open(request).read().decode("utf-8");
#第一次登录完成, 保存返回的相关参数
tuple = re.compile("ptuiCB\('([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)',\s*'([^\']*)'\);").findall(ret)[0];
if(tuple[0] == "0" and tuple[1] == "0"):
self.setQuery("nick", tuple[-1]);
self.setQuery("ptwebqq", re.compile("ptwebqq=([^\s]+)").findall(str(self._cookie))[0]);
return tuple[2]; #这里会返回一个地址, 第二次登录的时候要先访问一次这个地址
else:
print(tuple);#debug
return None;
#第二次登录
def _secLogin(self, url):
#先访问一次第一次登录返回的那个地址
self._opener.open(url);
data = {"r":'{"ptwebqq":"' + self.queryInfo("ptwebqq") + '","clientid":' + self.queryInfo("clientid") + ',"psessionid":"","status":"online"}'};
request = urllib.request.Request(URL_LOGIN2, headers = webQQHeader2);
ret = json.loads(self._opener.open(request, urlencode(data).encode("utf-8")).read().decode("utf-8"));
#retcode为0则第二次登录成功, 保存返回的相关参数
if(ret["retcode"] == 0):
result = ret["result"];
self.setQuery("status", result["status"]);
self.setQuery("vfwebqq", result["vfwebqq"]);
self.setQuery("psessionid", result["psessionid"]);
return True;
else:
print(ret); #debug
return False;
def login(self, id, psw):
self.setQuery("uin", id);
self._readInfo(URL_LOAD_INFO);
secLoginAddr = self._firstLogin(id, psw);
if(secLoginAddr == None):
return None;
if(not self._secLogin(secLoginAddr)):
return None;
self.setQuery("opener", self._opener);
self.setQuery("header", webQQHeader2);
#登录成功后将已经保存的所有参数信息返回
return self.getDict();
def main():
qq = WebQQLogin();
print(qq.login("397828451", "xxxx"));
if(__name__ == "__main__"):
main();
#coding=utf-8
class Queryable:
def __init__(self, dict):
self.__dict = dict;
def queryInfo(self, key):
if(self.find(key)):
return self.__dict[key];
return None;
def setQuery(self, key, value):
self.__dict[key] = value;
def setQueryEx(self, dict):
self.__dict.update(dict);
def find(self, key):
if(key in self.__dict):
return True;
return False;
def getDict(self):
return self.__dict;