这几天一直在研究js解密的问题,学会了不少新东西,以前见到那些加密的参数基本直接放弃,现在也可以琢磨一会进行尝试一番。我先分享一下心得,首先找到参数是在哪个js文件里面加密的,然后看看都调用了那些函数,我们自己尝试调用这些函数,我一般采用两种方式,这两种方式一般也都结合在一起。首先将js文件下载到本地,然后打开webStorm(IDE,其他jetbrains公司产品基本上都可以支持js的编写),进行格式化,然后找到产生参数的位置,查看调用的函数和参数,进行分析,(这些说多了没有用处,需要自己进行分析,累计经验)。然后找到相应的js函数代码进行本地调用,查看结果(本文通过这种方式,产生加密参数)。如果产生加密参数依赖的变量和函数太多就需要通过fiddler进行本地js文件调换http请求的js文件,这样我们就可以通过更改本地文件验证我们的想法,废话不多说,直接搞起。
环境:fiddler+pycharm+webStorm(用来分析js代码)+nodejs(本地调用js代码)
需要的包:pyexecjs(python调用js代码)+requests+nodejs的md5包
我抓取的是今日头条的热点部分。参数就最后一个重要,as,和cp看起来很需要,其实没有也可以,本文也会将他的调用js函数找出来,这个很简单。
https://www.toutiao.com/api/pc/feed/?category=news_hot&utm_source=toutiao&widen=1&max_behot_time=0&max_behot_time_tmp=0&tadrequire=true&as=A1258B951D63F63&cp=5B5D13EFB6430E1&_signature=wUrOCwAAmiBrZY6LqaMEVMFKzh
通过浏览器全局搜索关键字,找到了三个参数所在的js代码连接加密参数js连接,将他下载本地,并进行美化。很显然我们需要的参数都在这里,我们先解决as,cp(虽然这两个参数可有可无)
function e(t) {
var e = ascp.getHoney(), i = "";
console.error(r.params.max_behot_time_tmp);
window.TAC && (i = TAC.sign("refresh" === t ? 0 : r.params.max_behot_time_tmp)), r.params = _.extend({}, r.params, {
as: e.as,
cp: e.cp,
max_behot_time: "refresh" === t ? 0 : r.params.max_behot_time_tmp,
_signature: i
})
}
(1)分析cp和as:
根据js代码可以知道as是变量e的,然后变量e是 var e = ascp.getHoney(),直接找ascp,这是个直接调用,很简单,补多少了,过会会有python代码进行解释
!function (t) {
var e = {};
e.getHoney = function () {
var t = Math.floor((new Date).getTime() / 1e3), e = t.toString(16).toUpperCase(),
i = md5(t).toString().toUpperCase();
if (8 != e.length) return {as: "479BB4B7254C150", cp: "7E0AC8874BB0985"};
for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++) s += n[o] + e[o];
for (var r = "", c = 0; 5 > c; c++) r += e[c + 3] + a[c];
window.console.log(t)
window.console.log("A1" + s + e.slice(-3)); 这三个输出是调用本地js验证是否是需要的参数
window.console.log(e.slice(0, 3) + r + "E1");
return {as: "A1" + s + e.slice(-3), cp: e.slice(0, 3) + r + "E1"}
}, t.ascp = e
}
将这段放到本地运行,nodejs直接调用就行,可以得到两个参数
function a() {
var md5=require('md5-node');
var t = Math.floor((new Date).getTime() / 1e3), e = t.toString(16).toUpperCase(),
i = md5(t).toString().toUpperCase();
console.log(t)
if (8 != e.length) return {as: "479BB4B7254C150", cp: "7E0AC8874BB0985"};
for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++) s += n[o] + e[o];
for (var r = "", c = 0; 5 > c; c++) r += e[c + 3] + a[c];
return {as: "A1" + s + e.slice(-3), cp: e.slice(0, 3) + r + "E1"}
}
(2)分析sign
最开始那段js代码告诉我们i就是我们的加密参数,(i = TAC.sign("refresh" === t ? 0 : r.params.max_behot_time_tmp) 告诉我们只需要知道TAC.sign(?)就可以找到sign参数,我们再一次通过fiddler进行本地js调换,发现只要求出TAC.sign(0)就可以了,接下来寻找TAC。这是一个令人头皮发麻的js代码,懂前端js可能比较好理解,我也不懂js。
Function(function (t) {
return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g,
function (e) {
return t[15 & e.charCodeAt(0)]
})
}("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d
找到一个大佬的参考,参考链接忘记了,找到后我会在评论里加上,反正就是直接本地调用这个代码,得到第三个参数。三个参数都完成,进行爬虫。
function a() {
global.navigator = {};
global.navigator.userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36";
Function(function (t) {
var yy=t;
return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g, function (e) {
return t[15 & e.charCodeAt(0)]
})
}("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d
1,获取参数:
def get_cp_as():
f1 = open("C:\\Users\\luyuwei\\Desktop\\今日头条\\t.js", 'r')
js=f1.read()
ctx = execjs.compile(js)
return ctx.call('a')
def get_signature():
f2 = open("C:\\Users\\luyuwei\\Desktop\\今日头条\\tt.js", 'r')
js=f2.read()
ctx = execjs.compile(js)
return ctx.call('a')
2 t.js文件
function a() {
var md5=require('md5-node');
var t = Math.floor((new Date).getTime() / 1e3), e = t.toString(16).toUpperCase(),
i = md5(t).toString().toUpperCase();
console.log(t)
if (8 != e.length) return {as: "479BB4B7254C150", cp: "7E0AC8874BB0985"};
for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++) s += n[o] + e[o];
for (var r = "", c = 0; 5 > c; c++) r += e[c + 3] + a[c];
return {as: "A1" + s + e.slice(-3), cp: e.slice(0, 3) + r + "E1"}
}
tt.js文件:
function a() {
global.navigator = {};
global.navigator.userAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36";
Function(function (t) {
return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g, function (i) {
return t[15 & i.charCodeAt(0)]
})
}("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d
3 爬虫:
#coding=utf-8
import requests
import time
from 调用js import get_signature, get_cp_as
def get_url(as1,cp,signature):
url="http://www.toutiao.com/api/pc/feed/?category=news_hot&utm_source=toutiao&widen=1&max_behot_time=0&max_behot_time_tmp=0&tadrequire=true&as={as1}&cp={cp}&_signature={sign}"
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
'Referer': 'https://www.toutiao.com/ch/news_hot/',
'Accept-Language':'zh-CN,zh;q=0.9',
'Accept-Encoding': 'utf8'
}
url=url.format(as1=as1,cp=cp,sign=signature)
print(url)
html=requests.get(url=url,headers=headers,verify=False)
html.encoding='utf-8'
print(html.content.decode('utf-8').encode('utf-8').decode('utf-8'))
print(html.text)
f = open("2.txt", 'w+', encoding='utf-8')
f.write(html.text)
f.close()
return html.text
def parse_html():
f=open("2.txt",'r',encoding='utf-8')
t=f.read()
t=t.replace('false','False')
t = t.replace('true', 'True')
dict1=eval(t)
list=[]
for i in dict1['data']:
dict={}
dict['title']=i['title']
dict['label']=i['label']
# dict['comments_count']=i['comments_count']
dict['abstract']=i['abstract']
print(dict)
if __name__ == '__main__':
while True:
dic=get_cp_as();
as1=dic['as']
cp=dic['cp']
key=get_signature()
time.sleep(1)
print(as1)
print(cp)
print(key)
get_url(as1,cp,key)
parse_html()
这个有个大缺点就是,出现大量重复,我也不知道咋办,嘻嘻嘻,主要是讲解js解密的过程,嘻嘻嘻
-------------------------------------
将两个js文件内容都给修改了,之前的可能有问题,现在这个基本上也抓取不到什么有用信息,要隔一段时间才能抓取到有用的信息,但是还有重复的,大家参考下思路吧。