话不多说,直接开始
头条号:
url = https://www.toutiao.com/c/user/3410443345/#mid=3413306633
https://www.toutiao.com/c/user/article/?page_type=1&user_id=3410443345&max_behot_time=1565089129&count=20&as=A165EDB4DAA2895&cp=5D4AE2D839055E1&_signature=P5YcVhAaYuK2dyXMdG6kDj-WHE
max_behot_time
乍一瞅,诶,时间戳as
,cp
,_signature
~~~~~~_signature
,其他参数也可以,这个少一点,好找 function n() {
var e,
i = ascp.getHoney(),
t = '';
return window.TAC && (t = TAC.sign(userInfo.id + '' + d.params.max_behot_time)),
e = _.extend({
}, d.params, {
as: i.as,
cp: i.cp,
_signature: t
})
python 实现
import hashlib
import time
def getHoney():
e = int(time.time())
i = hex(e)[2:].upper()
# print(e, i)
# 生成md5加密对象
m1 = hashlib.md5()
m1.update(str(i).encode(encoding='utf-8'))
# 生成md5加密后的返回的16禁止字符串
t = str(m1.hexdigest()).upper()
a = ''
r = ''
o = i[:5]
n = i[-5:]
for s in range(5):
a += o[s] + i[s]
for l in range(5):
r += i[l + 3] + n[l]
_as = "A1" + a + i[-3:]
cp = i[:3] + r + "E1"
print('as', _as, '\ncp', cp)
return _as, cp
if __name__ == '__main__':
getHoney()
_signature
, 居然跳转到另一个js文件_signature
用到的所有js数据放到一个文件中,python
中的execjs
,将js文件存到本地,调用就可以得到数据了。//window.TAC && (console.log(userInfo.id + "" + a[t]),
navigator={};window={};navigator.userAgent=""
function asd() {
function e(e, a, r) {
return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a)
}
function a(e, a, r) {
return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a)
}
function r(e, a, r) {
var n, t, s = {}, b = s.d = r ? r.d + 1 : 0;
for (s["$" + b] = s,
t = 0; t < b; t++)
s[n = "$" + t] = r[n];
for (t = 0,
b = s.length = a.length; t < b; t++)
s[t] = a[t];
return c(e, 0, s)
}
function c(t, b, k) {
function u(e) {
v[x++] = e
}
function f() {
return g = t.charCodeAt(b++) - 32,
t.substring(b, b += g)
}
function l() {
try {
y = c(t, b, k)
} catch (e) {
h = e,
y = l
}
}
for (var h, y, d, g, v = [], x = 0; ; )
switch (g = t.charCodeAt(b++) - 32) {
case 1:
u(!v[--x]);
break;
case 4:
v[x++] = f();
break;
case 5:
u(function(e) {
var a = 0
, r = e.length;
return function() {
var c = a < r;
return c && u(e[a++]),
c
}
}(v[--x]));
break;
case 6:
y = v[--x],
u(v[--x](y));
break;
case 8:
if (g = t.charCodeAt(b++) - 32,
l(),
b += g,
g = t.charCodeAt(b++) - 32,
y === c)
b += g;
else if (y !== l)
return y;
break;
case 9:
v[x++] = c;
break;
case 10:
u(s(v[--x]));
break;
case 11:
y = v[--x],
u(v[--x] + y);
break;
case 12:
for (y = f(),
d = [],
g = 0; g < y.length; g++)
d[g] = y.charCodeAt(g) ^ g + y.length;
u(String.fromCharCode.apply(null, d));
break;
case 13:
y = v[--x],
h = delete v[--x][y];
break;
case 14:
v[x++] = t.charCodeAt(b++) - 32;
break;
case 59:
u((g = t.charCodeAt(b++) - 32) ? (y = x,
v.slice(x -= g, y)) : []);
break;
case 61:
u(v[--x][t.charCodeAt(b++) - 32]);
break;
case 62:
g = v[--x],
k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0;
break;
case 65:
h = v[--x],
y = v[--x],
v[--x][y] = h;
break;
case 66:
u(e(t[b++], v[--x], v[--x]));
break;
case 67:
y = v[--x],
d = v[--x],
u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y));
break;
case 68:
u(e((g = t[b++]) < "<" ? (b--,
f()) : g + g, v[--x], v[--x]));
break;
case 70:
u(!1);
break;
case 71:
v[x++] = n;
break;
case 72:
v[x++] = +f();
break;
case 73:
u(parseInt(f(), 36));
break;
case 75:
if (v[--x]) {
b++;
break
}
case 74:
g = t.charCodeAt(b++) - 32 << 16 >> 16,
b += g;
break;
case 76:
u(k[t.charCodeAt(b++) - 32]);
break;
case 77:
y = v[--x],
u(v[--x][y]);
break;
case 78:
g = t.charCodeAt(b++) - 32,
u(a(v, x -= g + 1, g));
break;
case 79:
g = t.charCodeAt(b++) - 32,
u(k["$" + g]);
break;
case 81:
h = v[--x],
v[--x][f()] = h;
break;
case 82:
u(v[--x][f()]);
break;
case 83:
h = v[--x],
k[t.charCodeAt(b++) - 32] = h;
break;
case 84:
v[x++] = !0;
break;
case 85:
v[x++] = void 0;
break;
case 86:
u(v[x - 1]);
break;
case 88:
h = v[--x],
y = v[--x],
v[x++] = h,
v[x++] = y;
break;
case 89:
u(function() {
function e() {
return r(e.y, arguments, k)
}
return e.y = f(),
e.x = c,
e
}());
break;
case 90:
v[x++] = null;
break;
case 91:
v[x++] = h;
break;
case 93:
h = v[--x];
break;
case 0:
return v[--x];
default:
u((g << 16 >> 16) - 16)
}
}
var n = this
, t = n.Function
, s = Object.keys || function(e) {
var a = {}
, r = 0;
for (var c in e)
a[r++] = c;
return a.length = r,
a
}
, b = {}
, k = {};
r(String.fromCharCode(103,114,36,68,97,116,101,110,32,1048,98,47,115,33,108,32,121,850,121,313,103,44,40,108,102,105,126,97,104,96,123,109,118,44,45,110,124,106,113,101,119,86,120,112,123,114,118,109,109,120,44,38,101,102,102,127,107,120,91,33,99,115,34,108,34,46,80,113,37,119,105,100,116,104,108,34,64,113,38,104,101,105,103,104,116,108,34,118,114,42,103,101,116,67,111,110,116,101,120,116,120,36,34,50,100,91,33,99,115,35,108,35,44,42,59,63,124,117,46,124,117,99,123,117,113,36,102,111,110,116,108,35,118,114,40,102,105,108,108,84,101,120,116,120,36,36,40856,3601,3616,44221,50,60,91,35,99,125,108,35,50,113,42,115,104,97,100,111,119,66,108,117,114,108,35,49,113,45,115,104,97,100,111,119,79,102,102,115,101,116,88,108,35,36,36,108,105,109,101,113,43,115,104,97,100,111,119,67,111,108,111,114,108,35,118,114,35,97,114,99,120,56,56,56,48,50,91,37,99,125,108,35,118,114,38,115,116,114,111,107,101,120,91,32,99,125,108,34,118,44,41,125,101,79,109,121,111,90,66,93,109,120,91,32,99,115,33,48,115,36,108,36,80,98,60,107,55,108,32,108,33,114,38,108,101,110,103,116,104,98,37,94,108,36,49,43,115,36,106,2,108,32,32,115,35,105,36,49,101,107,49,115,36,103,114,35,116,97,99,107,52,41,122,103,114,35,116,97,99,36,33,32,43,48,111,33,91,35,99,106,63,111,32,93,33,108,36,98,37,115,34,111,32,93,33,108,34,108,36,98,42,98,94,48,100,35,62,62,62,115,33,48,115,37,121,65,48,115,34,108,34,108,33,114,38,108,101,110,103,116,104,98,60,107,43,108,34,94,108,34,49,43,115,34,106,5,108,32,32,115,38,108,38,122,48,108,33,36,32,43,91,34,99,115,39,40,48,108,35,105,39,49,112,115,57,119,120,98,38,115,40,41,32,38,123,115,41,47,115,40,103,114,38,83,116,114,105,110,103,114,44,102,114,111,109,67,104,97,114,67,111,100,101,115,41,48,115,42,121,87,108,32,46,95,98,38,115,32,111,33,93,41,108,32,108,32,74,98,60,107,36,46,97,106,59,108,32,46,84,98,60,107,36,46,103,106,47,108,32,46,94,98,60,107,38,105,34,45,52,106,33,31,43,38,32,115,43,121,80,111,33,93,43,115,33,108,33,108,32,72,100,62,38,108,33,108,32,66,100,62,38,43,108,33,108,32,60,100,62,38,43,108,33,108,32,54,100,62,38,43,108,33,108,32,38,43,32,115,44,121,61,111,33,111,33,93,47,113,34,49,51,111,33,108,32,113,34,49,48,111,33,93,44,108,32,50,100,62,38,32,115,46,123,115,45,121,77,111,33,111,33,93,48,113,34,49,51,111,33,93,42,76,100,60,108,32,52,100,35,62,62,62,98,124,115,33,111,33,108,32,113,34,49,48,111,33,93,44,108,33,38,32,115,47,121,73,111,33,111,33,93,46,113,34,49,51,111,33,93,44,111,33,93,42,74,100,60,108,32,54,100,35,62,62,62,98,124,38,111,33,93,43,108,32,38,43,32,115,48,108,45,108,33,38,108,45,108,33,105,39,49,122,49,52,49,122,52,98,47,64,100,60,108,34,98,124,38,43,108,45,108,40,108,33,98,94,38,43,108,45,108,38,122,108,39,103,44,41,103,107,125,101,106,111,123,127,99,109,44,41,124,121,110,126,76,105,106,126,101,109,91,34,99,108,36,98,37,64,100,60,108,38,122,108,39,108,32,36,32,43,91,34,99,108,36,98,37,98,124,38,43,108,45,108,37,56,100,60,64,98,124,108,33,98,94,38,43,32,113,36,115,105,103,110,32), [TAC = {}]);
}
asd();
function test(){
return TAC.sign("3410443345");
}
开始生成_signature
import execjs
with open('jj.js', 'r') as f:
file = f.read()
data = execjs.compile(file)
# 参数 str(user_id) + str(时间戳)
# str(3410443345) + str(1565051266)
res = data.eval('TAC.sign({})'.format('34104433451565051266'))
print(res)
那么~~~
所有的参数值都可以拿到了
开始拼接url
了
开始访问了。额の…这什么啊,数据呢
放大招了,开启selenium + webdriver
模式
from selenium import webdriver
def get_as_cp_signature(user_id, mid, max_behot_time):
'''使用selenium获取as, cp, signature'''
# def get_signature():
options = webdriver.ChromeOptions()
# 设置中文
options.add_argument('lang=zh_CN.UTF-8')
# 设置无头浏览器
options.set_headless()
options.add_argument(
'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"')
brower = webdriver.Chrome(chrome_options=options)
brower.get('https://www.toutiao.com/c/user/{}/#mid={}'.format(user_id, mid))
# print(brower.current_url)
ascp = brower.execute_script('return ascp.getHoney({},{})'.format(user_id, mid))
_as = ascp['as']
cp = ascp['cp']
signature = brower.execute_script('return TAC.sign({})'.format(str(user_id) + str(max_behot_time)))
print("="*50)
print('as: ', _as, '\ncp: ', cp, '\nsignature: ', signature)
if __name__ == '__main__':
user_id = '3410443345' # 头条号id
mid = '3413306633' # 头条号mid
max_behot_time = 1565073223 # 时间戳
get_as_cp_signature(user_id=user_id, mid=mid, max_behot_time=max_behot_time)
好了,所有的参数都齐了,那么再次开始访问,
阿欧~~~
又一次失败…
发现一个奇怪的事情
在火狐浏览器中获取到的json数据地址,复制到Google浏览器中也是没有数据的。
以下为火狐浏览器中的json数据地址:
https://www.toutiao.com/c/user/article/?page_type=1&user_id=3410443345&max_behot_time=0&count=20&as=A1B5DD642A12894&cp=5D4A6268C9340E1&_signature=P5YcVhAaYuK2dyXMdG5kfD-WHE
这该如何是好~~
接着分析吧, 待更~~~