目前在某小公司的大数据组实习,第二个爬虫练习就是工商的爬虫gsxt.gov.cn。与第一个爬虫练习不同,第一个的爬虫练习,验证码是图片,思路就是下载图片下来,人工输入验证码,提交参数,继续爬虫。而这个涉及到了滑动验证码,验证的参数并不是那么容易获取。
于是站在前人的肩膀上,找到了以下超详细的教程,前面重复的东西就不写了,直接看大佬的文章吧。但是,该教程的验证码js核心是geetest.5的版本,而更新的是geetest.6版本了,直接运行代码就会被forbidden了。所以,必须找到geetest.6 和 geetest.5 的区别,才能破解。
总的来说,关键依然是找出ajax.php?的参数
userresponse
和a
如何构造的现在参数
a
已经改名为aa
了关键的请求:
http://api.geetest.com/ajax.php
GET
Query string gt 1d2c042096e050f07cb35ff3df5afd92 challenge 8208538c7cc334e9b62cd8e95c74baadel userresponse 93339939999ee5e27 passtime 2718 imgload 133 aa F-/!)!)!)(!!?ts((ytysstss5s@ts….. callback geetest_1511250608309 response:
geetest_1511250608309({"success":1, "message": "success", "score":"8","validate":"c58525b4f04020a8f1c326e22714c22c"})
不比不知道,一比吓一跳!geetest.6 的代码完全被混淆了!!!
geetest.5 获取 aa
的函数:
f = function(a) {
for (var b, f = c(Q.t("arr", a)), g = [], h = [], i = [], j = 0, k = f.length; j < k; j++) b = e(f[j]),
b ? h.push(b) : (g.push(d(f[j][0])), h.push(d(f[j][1]))),
i.push(d(f[j][2]));
return g.join("") + "!!" + h.join("") + "!!" + i.join("")
};
geetest.6 获取 aa
的函数:
(代码被混淆了,前面反混淆了才知道这个是获取 aa
的函数)
function(z3O) {
var f8h = m6h.V3h()[35][36][34][8];
while (f8h !== m6h.V3h()[36][16][12]) {
switch (f8h) {
case m6h.V3h()[25][15][6]:
f8h = m2h * (m2h + 1) % 2 + 3 && i3O < C3O ? m6h.H6h()[38][25][0] : m6h.H6h()[19][34][30];
break;
case m6h.V3h()[18][32][15]:
var m2h = 1;
f8h = m6h.V3h()[16][21][36];
break;
case m6h.H6h()[23][12][30]:
return r3O[m6h.N7O(479)](m6h.N7O(508)) + m6h.N7O(11) + l3O[m6h.v7O(479)](m6h.N7O(508)) + m6h.v7O(11) + U3O[m6h.v7O(479)](m6h.v7O(508));
f8h = m6h.V3h()[24][4][12];
break;
case m6h.H6h()[26][11][9]:
i3O++;
f8h = m6h.V3h()[25][15][6];
break;
case m6h.H6h()[11][29][36]:
var a3O, u3O = p3O(H97[m6h.N7O(668)](m6h.N7O(40), z3O)),
r3O = [],
l3O = [],
U3O = [],
i3O = 0,
C3O = u3O[m6h.N7O(472)];
f8h = m6h.V3h()[5][8][6];
break;
case m6h.V3h()[29][16][0]:
a3O = g3O(u3O[i3O]),
a3O ? l3O[m6h.N7O(592)](a3O) : (r3O[m6h.N7O(592)](S3O(u3O[i3O][0])), l3O[m6h.N7O(592)](S3O(u3O[i3O][1]))),
U3O[m6h.N7O(592)](S3O(u3O[i3O][2]));
m2h = m2h > 65568 ? m2h / 6 : m2h * 6;
f8h = m6h.H6h()[13][24][31][5];
break;
}
}
},
6000多行的代码,不需要全都反混淆完,只需要关注参数构造的地方就好了
整理了一下,缩略后代码结构如下:
在geetest.6.0.1.js 中,先要把最开始的两个函数m177.Y3h
和 m177.e70
读懂,这是反混淆的关键
先看看第一个函数m177.Y3h
:
m177.Y3h = function() {
var c = 2;
while (c !== 1) {
switch (c) {
case 2:
return {
o:
function M(f, y) {
var s = 2;
while (s !== 10) {
switch (s) {
case 5:
s = R < f ? 4 : 9;
break;
case 13:
q -= 1;
s = 6;
break;
case 8:
s = E < f ? 7 : 11;
break;
case 4:
P[(R + y) % f] = [];
s = 3;
break;
case 1:
var R = 0;
s = 5;
break;
case 11:
return P;
s = 10;
break;
case 9:
var E = 0;
s = 8;
break;
case 7:
var q = f - 1;
s = 6;
break;
case 6:
s = q >= 0 ? 14 : 12;
break;
case 14:
P[E][(q + y * E) % f] = P[q];
s = 13;
break;
case 3:
R += 1;
s = 5;
break;
case 2:
var P = [];
s = 1;
break;
case 12:
E += 1;
s = 8;
break;
}
}
} (39, 14)
};
c = 1;
break;
}
}
} ();
这里就是不断的case判断,然后执行每一个case里面的代码。
转换成平时的代码,如下:
m177.Y3h = function() {
return {o: M(39, 14)};
} ();
//生成一个循环引用的二维数组
function M(f, y) {
var P = [];
var R = 0;
while(R < f ){
P[(R + y) % f] = [];
R += 1;
}
var E = 0;
while (E < f ){
var q = f - 1;
while (q >= 0 ){
P[E][(q + y * E) % f] = P[q];
q -= 1;
}
E += 1;
}
return P
}
用chrome调试验证一下:
确实就是一个二维数组
然后第二个函数m177.e70
m177.e7O = function() {
var R = 2;
while (R !== 1) {
switch (R) {
case 2:
return {
o:
function(E) {
var y = 2;
while (y !== 14) {
switch (y) {
case 2:
var M = '',
//此处省略一下那个特别长的被编码的字符串
P = decodeURI("&=Wl%E5%99%87%E8%A8%B5%E9%80%9B%");
y = 1;
break;
case 5:
y = k < P.length ? 4 : 7;
break;
case 1:
var k = 0,
f = 0;
y = 5;
break;
case 9:
M += String.fromCharCode(P.charCodeAt(k) ^ E.charCodeAt(f));
y = 8;
break;
case 4:
y = f === E.length ? 3 : 9;
break;
case 7:
M = M.split('+');
return function(q) {
var c = 2;
while (c !== 1) {
switch (c) {
case 2:
return M[q];
c = 1;
break;
}
}
};
y = 14;
break;
case 3:
f = 0;
y = 9;
break;
case 8:
k++,
f++;
y = 5;
break;
}
}
} ('UO4GPS')
};
R = 1;
break;
}
}
} ();
继续来转换一下代码:
m177.e7O = function() {
return {o:function('UO4GPS') };
} ();
function(E){
var M = '';
//此处省略一下那串特别长的被编码的字符串
var P = decodeURI("&=Wl%E5%99%87%E8%A8%B5%E9%80%9B%");
var k = 0, f = 0;
while ( k < P.length ){
if(f===E.length){
f = 0;
M += String.fromCharCode(P.charCodeAt(k) ^ E.charCodeAt(f));
k++;
f++;
} else {
M += String.fromCharCode(P.charCodeAt(k) ^ E.charCodeAt(f));;
k++;
f++;
}
}
M = M.split('+');
return M
}
继续用chrome调试查看一下M:
返回的M是一个字符串的数组。
至此,已经知道了
m177.Y3h.o
是一个循环引用的数组
m177.e7O.o
是一个字符串数组
接下来,这几条语句很容易看懂
m177.V3h = function() {
return typeof m177.Y3h.o === 'function' ? m177.Y3h.o.apply(m177.Y3h, arguments) : m177.Y3h.o;
};
m177.H6h = function() {
return typeof m177.Y3h.o === 'function' ? m177.Y3h.o.apply(m177.Y3h, arguments) : m177.Y3h.o;
};
m177.N7O = function() {
return typeof m177.e7O.o === 'function' ? m177.e7O.o.apply(m177.e7O, arguments) : m177.e7O.o;
};
m177.v7O = function() {
return typeof m177.e7O.o === 'function' ? m177.e7O.o.apply(m177.e7O, arguments) : m177.e7O.o;
};
// m177.V3h = m177.H6h = m177.Y3h.o
// m177.N7O = m177.v7O = m177.e7O.o
本人并没用过javaScript,百度了一下apply和arguments,大概了解了,相当于传入参数作为数组下标
比如:
m177.H6h()[13][1][27][27]
= m177.Y3h.o[13][1][27][27]
,即数组P[13][1][27][27]
后面代码中会经常用到 case m177.H6h()[13][1][27][27]
这样来控制循环,这时候要把 m177.H6h()[13][1][27][27]
转换成数字,可以使用该数组所在的位置作为判断的数字。通过console可以看到
case m177.H6h()[13][1][27][27]:
也就是 case 14 :
=
m177.e7O.o(416), 即字符串数组
M[416],就是字符串“.jpg”在代码中,ctrl + f
居然没有找到userresponse、aa和其他参数构造的地方??!!!!
大概略了一下代码,发现居然有十六进制表示的字符串!!!敢肯定,参数被转换成十六进制了。
果然找到了!
j9O = k9O[m6h.N7O(58)][m6h.v7O(405)] ? o97[m6h.N7O(446)](k9O[m6h.N7O(571)] + o97[m6h.N7O(516)](m6h.v7O(453)) + j9O) : encodeURIComponent(j9O);
var h9O = {
//gt
'\x67\x74': k9O[m6h.v7O(58)][m6h.v7O(461)],
//challenge
'\x63\x68\x61\x6c\x6c\x65\x6e\x67\x65': k9O[m6h.v7O(58)][m6h.N7O(658)],
//userresponse
'\x75\x73\x65\x72\x72\x65\x73\x70\x6f\x6e\x73\x65': g97[m6h.N7O(152)](b9O, k9O[m6h.v7O(58)][m6h.N7O(658)]),
//passtime
'\x70\x61\x73\x73\x74\x69\x6d\x65': H97[m6h.N7O(668)](m6h.N7O(111), k9O[m6h.N7O(130)])[m6h.N7O(605)]() - H97[m6h.v7O(668)](m6h.v7O(448), k9O[m6h.v7O(130)]),
//image
'\x69\x6d\x67\x6c\x6f\x61\x64': H97[m6h.v7O(668)](m6h.v7O(173), k9O[m6h.v7O(130)]),
//aa
'\x61\x61': j9O
};
按照之前的反混淆方法,把转换成我们平时看的代码
j9O = k9O["d"]["referer_encode"] ? o97["t"](k9O("A") + o97["B"]("REFb0UEJ") + j9O) : encodeURIComponent(j9O);
var h9O = {
gt : k9O["d"]["gt"],
challenge : k9O["d"]["challenge"],
userresponse: g97["C"](b9O, k9O["d"]["challenge"]),
passtime : H97["a"]("endTime"), k9O["b"])["getTime"]() - H97["a"]("startTime"), k9O["b"]),
image : H97["a"]("imgload", k9O["b"]),
aa : j9O
};
对比一下geetest.5的代码:
var p = {
gt: a.config.gt,
challenge: a.config.challenge,
userresponse: ca.ra(l, a.config.challenge),
passtime: Q.t("endTime", a.id).getTime() - Q.t("startTime", a.id),
imgload: Q.t("imgload", a.id),
a: encodeURIComponent(n)
};
大概能看出来了,其实改变的不多。重点关注userresponse 和 aa 参数构造的函数
userresponse: g97["C"](b9O, k9O["d"]["challenge"]),
j9O = k9O["d"]["referer_encode"] ? o97["t"](k9O("A") + o97["B"]("REFb0UEJ") + j9O) : encodeURIComponent(j9O);
找到g97["C"]
函数
g97[m6h.v7O(152)] = function(Y9O, C9O) {
var Y9h = m6h.H6h()[32][33][15];
while (Y9h !== m6h.V3h()[21][16][3]) {
switch (Y9h) {
case m6h.H6h()[12][35][33]:
i1h = i1h >= 11137 ? i1h - 10 : i1h + 10;
Y9h = m6h.H6h()[18][3][9];
break;
case m6h.H6h()[19][18][24]:
q9O - c9O[a9O] >= 0 ? (I9O = parseInt(Math[m6h.N7O(436)]() * U9O[a9O][m6h.N7O(472)], 10), L9O += U9O[a9O][I9O], q9O -= c9O[a9O]) : (U9O[m6h.v7O(84)](a9O, 1), c9O[m6h.v7O(84)](a9O, 1), a9O -= 1);
Y9h = m6h.H6h()[4][1][33];
break;
case m6h.H6h()[7][27][27]:
var x9O = C9O[m6h.v7O(472)];
Y9h = m6h.V3h()[28][24][18];
break;
case m6h.H6h()[27][32][25][38]:
var I9O, q9O = D9O,
a9O = 4,
L9O = m6h.v7O(508),
c9O = [1, 2, 5, 10, 50];
Y9h = m6h.H6h()[29][1][9];
break;
case m6h.H6h()[23][3][12]:
l9O++;
Y9h = m6h.H6h()[38][38][0];
break;
case m6h.V3h()[11][6][34][8]:
return L9O;
Y9h = m6h.V3h()[16][24][3];
break;
case m6h.V3h()[22][25][6]:
f9O = C9O[m6h.v7O(89)](l9O),
R9O[f9O] || (R9O[f9O] = 1, U9O[r9O][m6h.N7O(592)](f9O), r9O++, r9O = 5 == r9O ? 0 : r9O);
v1h = v1h >= 72238 ? v1h / 6 : v1h * 6;
Y9h = m6h.H6h()[33][3][18];
break;
case m6h.H6h()[17][4][0]:
var z9O = C9O[m6h.N7O(612)](32),
W9O = [],
l9O = 0;
Y9h = m6h.V3h()[18][31][6][6];
break;
case m6h.H6h()[14][37][12][12]:
var f9O, U9O = [[], [], [], [], []],
R9O = {},
r9O = 0;
l9O = 0;
Y9h = m6h.H6h()[26][20][27];
break;
case m6h.V3h()[34][22][15]:
var i1h = 5;
var v1h = 0;
Y9h = m6h.H6h()[14][17][4][17];
break;
case m6h.H6h()[18][27][18]:
l9O++;
Y9h = m6h.H6h()[24][7][18];
break;
case m6h.H6h()[5][4][24]:
z9O = 36 * W9O[0] + W9O[1];
var D9O = Math[m6h.N7O(580)](Y9O) + z9O;
C9O = C9O[m6h.v7O(612)](0, 32);
Y9h = m6h.V3h()[9][6][33];
break;
case m6h.V3h()[18][1][18]:
Y9h = v1h * (v1h + 1) % 2 + 8 && l9O < x9O ? m6h.V3h()[31][34][6] : m6h.V3h()[35][27][36];
break;
case m6h.H6h()[8][32][9]:
var s9O = z9O[m6h.N7O(493)](l9O);
W9O[l9O] = s9O > 57 ? s9O - 87 : s9O - 48;
N1h = N1h > 28329 ? N1h - 9 : N1h + 9;
Y9h = m6h.V3h()[24][17][12];
break;
case m6h.V3h()[21][34][0]:
Y9h = l9O < z9O[m6h.N7O(472)] && N1h * (N1h + 1) * N1h % 2 == 0 ? m6h.H6h()[17][2][9] : m6h.V3h()[9][21][24];
break;
case m6h.V3h()[27][4][6]:
var N1h = 5;
Y9h = m6h.V3h()[35][22][0];
break;
case m6h.H6h()[14][25][14][1]:
Y9h = q9O > 0 && i1h * (i1h + 1) % 2 + 1 ? m6h.V3h()[19][18][24] : m6h.H6h()[28][10][3];
break;
}
}
};
一样的套路,反混淆完,再对比geetest.5的代码,如下:
//这里for改成while,逻辑上都一样的
function get_userresponse (a,b){
var i1h = 5;
var v1h = 0;
var N1h = 5;
var c = b[slice](32), d = [], e = 0;
while(e < c[length] && N1h * (N1h + 1) * N1h % 2 == 0) {
var f = c[charCodeAt](e);
d[e] = f > 57 ? f - 87 : f - 48;
N1h = N1h > 28329 ? N1h - 9 : N1h + 9;
e++;
}
c = 36 * d[0] + d[1];
var g = Math[round](a) + c;
b = b[slice](0, 32);
var h, i = [[], [], [], [], []],
j= {},
k = 0;
e = 0;
var l =b[length];
while(v1h * (v1h + 1) % 2 + 8 && e < l){
h = b[charAt](e),
j[h] || (j[h] = 1, i[k][push](h), k++, k= 5 == k ? 0 :k);
v1h = v1h >= 72238 ? v1h / 6 : v1h * 6;
e++;
}
var m, n = g,
o = 4,
p = '',
q = [1, 2, 5, 10, 50];
while(n> 0 && i1h * (i1h + 1) % 2 + 1){
n - q[o] >= 0 ? (m = parseInt(Math[random]() * i[o][length], 10), p += i[o][m], n -= q[o]) : (i[splice](o, 1), q[splice](o, 1), o -= 1);
i1h = i1h >= 11137 ? i1h - 10 : i1h + 10;
}
return p;
}
几乎没什么改变,只是每个循环会加上以下的表达式来判断,可能是某种算法吧。反正按照代码这样加上去就好了。
N1h * (N1h + 1) * N1h % 2 == 0
v1h * (v1h + 1) % 2 + 8
i1h * (i1h + 1) % 2 + 1
那么接着来看看生成aa
的函数
aa
跟 j9o
有关,查看j90
的定义
var b9O = parseInt(T9O),
j9O = X97[m6h.v7O(446)](k9O[m6h.N7O(130)]);
找到X97[m6h.v7O(446)](k9O[m6h.N7O(130)]);
这个函数的声明
即X97["t"](k9O["b"]
, t 的十六进制是’\x74’
找到该函数如下:
'\x74': function(z3O) {
var f8h = m6h.V3h()[35][36][34][8];
while (f8h !== m6h.V3h()[36][16][12]) {
switch (f8h) {
case m6h.V3h()[25][15][6]:
f8h = m2h * (m2h + 1) % 2 + 3 && i3O < C3O ? m6h.H6h()[38][25][0] : m6h.H6h()[19][34][30];
break;
case m6h.V3h()[18][32][15]:
var m2h = 1;
f8h = m6h.V3h()[16][21][36];
break;
case m6h.H6h()[23][12][30]:
return r3O[m6h.N7O(479)](m6h.N7O(508)) + m6h.N7O(11) + l3O[m6h.v7O(479)](m6h.N7O(508)) + m6h.v7O(11) + U3O[m6h.v7O(479)](m6h.v7O(508));
f8h = m6h.V3h()[24][4][12];
break;
case m6h.H6h()[26][11][9]:
i3O++;
f8h = m6h.V3h()[25][15][6];
break;
case m6h.H6h()[11][29][36]:
var a3O, u3O = p3O(H97[m6h.N7O(668)](m6h.N7O(40), z3O)),
r3O = [],
l3O = [],
U3O = [],
i3O = 0,
C3O = u3O[m6h.N7O(472)];
f8h = m6h.V3h()[5][8][6];
break;
case m6h.V3h()[29][16][0]:
a3O = g3O(u3O[i3O]),
a3O ? l3O[m6h.N7O(592)](a3O) : (r3O[m6h.N7O(592)](S3O(u3O[i3O][0])), l3O[m6h.N7O(592)](S3O(u3O[i3O][1]))),
U3O[m6h.N7O(592)](S3O(u3O[i3O][2]));
m2h = m2h > 65568 ? m2h / 6 : m2h * 6;
f8h = m6h.H6h()[13][24][31][5];
break;
}
}
},
按照之前反混淆的方法解读,如果有调用了函数的再去找函数,再对比geetest.5,最后结果如下:
function c (a) {
var H2h = 8;
var b, c, d, e = [], f = 0, g = [], h = 0, i = a.length - 1;
while ( H2h * (H2h + 1) * H2h % 2 == 0 && h < i) {
b = Math.round(a[h + 1][0] - a[h][0]),
c = Math.round(a[h + 1][1] - a[h][1]),
d = Math.round(a[h + 1][2] - a[h][2]),
g.push([b, c, d]),
0 == b && 0 == c && 0 == d || (0 == b && 0 == c ? f += d: (e.push([b, c, d + f]), f = 0));
H2h = H2h >= 88669 ? H2h / 2 : H2h * 2;
h++;
}
return 0 !== f && e.push([b, c, f]), e
}
function d(a) {
var b = "()*,-./0123456789:?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqr",
c = b.length,
d = "",
e = Math.abs(a),
f = parseInt(e / c);
f >= c && (f = c - 1),
f && (d = b.charAt(f)),
e %= c;
var g = "";
return a < 0 && (g += "!"),
d && (g += "$"),
g + d + b.charAt(e)
}
function e(a) {
var y2h = 8;
var b = [[1, 0], [2, 0], [1, -1], [1, 1], [0, 1], [0, -1], [3, 0], [2, -1], [2, 1]],
d = 0,
e = b[length],
c = "stuvwxyz~";
while(d < e && y2h * (y2h + 1) * y2h % 2 == 0 ){
if ( a[0] ==b[d][0] &&a[1] == b[d][1]){
return c[d];
} else{
y2h = y2h > 53485 ? y2h / 9 : y2h * 9;
d++;
}
}
return 0;
}
function get_aa (z3O) {
var m2h = 1;
var b, f = c({"arr": z3O}),
g = [],
h = [],
i= [],
j = 0,
k = f[length];
while(m2h * (m2h + 1) % 2 + 3 && j < k){
b = e(f[j]),
b ? h[push](b) : (g[push](d(f[j][0])), l3O[push](d(f[j][1]))),
i[push](d(f[j][2]));
m2h = m2h > 65568 ? m2h / 6 : m2h * 6;
j++;
}
return g[join]('') +"!!" + h[join]('') + "!!" + i[join]('');
}
似乎也是多了一些判断而已
m2h * (m2h + 1) % 2 + 3
y2h * (y2h + 1) * y2h % 2 == 0
H2h * (H2h + 1) * H2h % 2 == 0
难道这次更新了之后,只是混淆了代码并且多了这些判断而已???
在前人的代码上,加上了这些判断的代码。执行。
但一直forbidden。
调用公司内部机器学习模拟鼠标轨迹的接口。
果然!!一次都没通过,还是forbidden,不是轨迹的问题。
再次检查,之前的代码都没有错!既然没通过,那一定就是漏掉了某些东西了!
当我再次查看j90
的定义的时候,终于有所发现了!!!
I9h = (j9O = X97[m6h.v7O(599)](j9O, k9O[m6h.N7O(58)][m6h.v7O(184)], k9O[m6h.N7O(58)][m6h.N7O(17)]), k9O[m6h.N7O(58)][m6h.v7O(422)]) ? m6h.H6h()[28][1][24] : m6h.H6h()[4][14][33];
(j9O = X97[m6h.v7O(599)](j9O, k9O[m6h.N7O(58)][m6h.v7O(184)], k9O[m6h.N7O(58)][m6h.N7O(17)]), k9O[m6h.N7O(58)][m6h.v7O(422)])
我擦!!!之前以为那个函数就是最终aa的结果了。没想到,又再把结果放到一个函数里面,再生成一个结果来作为aa的值。这才是最终aa生成的地方!!!!
找到X97["u"](j90,k9O["d"]["c"], k9O["d"]["s"])
这个函数
'\x75': function(c3O, f3O, Y3O) {
var q8h = m6h.H6h()[32][33][15];
while (q8h !== m6h.V3h()[35][37][30]) {
switch (q8h) {
case m6h.V3h()[20][23][6]:
q8h = (!f3O || !Y3O) && S2h * (S2h + 1) % 2 + 9 ? m6h.V3h()[33][33][0] : m6h.V3h()[22][9][0];
break;
case m6h.V3h()[10][37][15]:
var n2h = 4;
var S2h = 6;
q8h = m6h.V3h()[21][37][18][18];
break;
case m6h.H6h()[15][15][0]:
return c3O;
q8h = m6h.V3h()[7][35][30];
break;
case m6h.H6h()[2][30][30]:
R3O += 2;
var W3O = parseInt(L3O, 16),
V3O = String[m6h.v7O(294)](W3O),
q3O = (x3O * W3O * W3O + I3O * W3O + D3O) % c3O[m6h.v7O(472)];
s3O = s3O[m6h.N7O(672)](0, q3O) + V3O + s3O[m6h.v7O(672)](q3O);
q8h = m6h.H6h()[28][14][24];
break;
case m6h.H6h()[32][18][24]:
return s3O;
q8h = m6h.V3h()[3][18][30];
break;
case m6h.V3h()[2][1][24]:
n2h = n2h > 84261 ? n2h / 1 : n2h * 1;
q8h = m6h.V3h()[20][5][9];
break;
case m6h.H6h()[11][11][0]:
var L3O, R3O = 0,
s3O = c3O,
x3O = f3O[0],
I3O = f3O[2],
D3O = f3O[4];
q8h = m6h.V3h()[21][19][29][16];
break;
case m6h.H6h()[14][38][9]:
q8h = (L3O = Y3O[m6h.v7O(672)](R3O, 2)) && n2h * (n2h + 1) % 2 + 4 ? m6h.V3h()[31][7][24][24] : m6h.V3h()[10][22][24];
break;
}
}
},
反混淆完,js代码如下:
function(c3O, f3O, Y3O) {
var n2h = 4;
var S2h = 6;
if((!f3O || !Y3O) && S2h * (S2h + 1) % 2 + 9){
return c3O;
} else {
var L3O, R3O = 0, s3O = c3O, x3O = f3O[0], I3O = f3O[2], D3O = f3O[4];
while((L3O = Y3O[substr](R3O, 2)) && n2h * (n2h + 1) % 2 + 4){
R3O += 2;
var W3O = parseInt(L3O, 16)
, V3O = String[fromCharCode](W3O)
, q3O = (x3O * W3O * W3O + I3O * W3O + D3O) % c3O[length];
s3O = s3O[substr](0, q3O) + V3O + s3O[substr](q3O);
n2h = n2h > 84261 ? n2h / 1 : n2h * 1;
} else {
return s3O;
}
}
}
k9O["d"]["c"], k9O["d"]["s"]
这两个参数是外部传入的参数
仔细看看,和gt、challenge类似,那应该来源也跟gt、challenge一样
gt : k9O["d"]["gt"],
challenge : k9O["d"]["challenge"],
回头看看基本的请求数据的顺序:
SearchItemCaptcha?v=…——> get.php? ——> ajax.php? ——>crop-query-search-1.html
那么从get.php?上找,请求参数 和 返回值 发现:
参数并没有c、s,那么返回值:
geetest_1511604205633({"c": [12, 58, 98, 36, 43, 95, 62, 15, 12], "theme": "golden", "benchmark": false, "slice": "pictures/gt/fc064fc73/slice/5e9b2ce77.png", "static_servers": ["static.geetest.com/", "dn-staticdown.qbox.me/"], "type": "slide", "api_server": "http://api.geetest.com/", "feedback": "", "ypos": 10, "link": "", "https": false, "s": "57685230", "challenge": "d1f7669c90d4a5ab7f0c070233ff6aa3dm", "bg": "pictures/gt/fc064fc73/bg/5e9b2ce77.jpg", "logo": true, "hide_delay": 800, "clean": true, "height": 116, "fullbg": "pictures/gt/fc064fc73/fc064fc73.jpg", "id": "ad1f7669c90d4a5ab7f0c070233ff6aa3", "product": "popup", "theme_version": "3.2.0", "xpos": 0, "show_delay": 250, "fullpage": false, "gt": "1d2c042096e050f07cb35ff3df5afd92", "version": "6.0.1", "mobile": false})
发现"c": [12, 58, 98, 36, 43, 95, 62, 15, 12]
"s": "57685230"
OK!把上面js代码换成python代码,这段代码的作用就是把"s": "57685230"
两个两个拆分成57、68、52、30,然后按unicode码转换成字符,夹在之前的aa中间,生成一个新的aa字符串。
至此,就生成了最终的字符串参数aa了!!
测试一下,直接使用原来那个教程保存的轨迹数据,成功率在80%左右吧,有时候也会被forbidden。当然,数据用久了可能就会慢慢失效了。所以需要偶尔更新一下轨迹数据
基本思路就是这样,破解完剩下的事情就是简单的爬虫了。