反爬虫破解——裁判文书网

这段时间研究了下裁判文书网的反爬策略感觉挺有意思的,这里给大家分享一下

思路分析

我们先在裁判文书网上随便搜索点东西,我这里搜的是"经济犯罪",可以看到返回了很多页的数据,我们点击下一页然后看下请求
反爬虫破解——裁判文书网_第1张图片
观察上图中的参数,并试着在postman中调用一下试试看
反爬虫破解——裁判文书网_第2张图片
可以看到直接能发送成功,并且修改pageNum值之后仍然能够正常拿到返回结果,可是返回结果是加密的。那我们先来研究下如何进行解密。
观察可以看到返回结构如下

{
    "code": 1,
    "description": null,
    "secretKey": "I7NGzDPxwy01xcpHGrNnZcDo",
    "result": ".........",
    "success": true
}

其中secretKey似乎是秘钥,尝试搜索secretKey
反爬虫破解——裁判文书网_第3张图片
搜索后可以看到在website.js中似乎有解密的代码(上图中4位置)。我们在website.js的245行(上图中4位置)打上断点,然后在网站上点击翻页,可以发现网页会在断点处停住,然后解密函数,可以找到解密方法的位置
反爬虫破解——裁判文书网_第4张图片
反爬虫破解——裁判文书网_第5张图片
观察此函数可以发现此解密方法在js中有完整代码,加解密代码压缩后如下

var CryptoJS=CryptoJS||function(y,h){var j={},g=j.lib={},f=function(){},z=g.Base={extend:function(b){f.prototype=this;var d=new f;b&&d.mixIn(b);d.hasOwnProperty("init")||(d.init=function(){d.$super.init.apply(this,arguments)});d.init.prototype=d;d.$super=this;return d},create:function(){var b=this.extend();b.init.apply(b,arguments);return b},init:function(){},mixIn:function(b){for(var d in b){b.hasOwnProperty(d)&&(this[d]=b[d])}b.hasOwnProperty("toString")&&(this.toString=b.toString)},clone:function(){return this.init.prototype.extend(this)}},c=g.WordArray=z.extend({init:function(b,d){b=this.words=b||[];this.sigBytes=d!=h?d:4*b.length},toString:function(b){return(b||t).stringify(this)},concat:function(d){var n=this.words,b=d.words,l=this.sigBytes;d=d.sigBytes;this.clamp();if(l%4){for(var e=0;e>>2]|=(b[e>>>2]>>>24-8*(e%4)&255)<<24-8*((l+e)%4)}}else{if(65535>>2]=b[e>>>2]}}else{n.push.apply(n,b)}}this.sigBytes+=d;return this},clamp:function(){var b=this.words,d=this.sigBytes;b[d>>>2]&=4294967295<<32-8*(d%4);b.length=y.ceil(d/4)},clone:function(){var b=z.clone.call(this);b.words=this.words.slice(0);return b},random:function(d){for(var e=[],b=0;b>>2]>>>24-8*(l%4)&255;b.push((e>>>4).toString(16));b.push((e&15).toString(16))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>3]|=parseInt(d.substr(e,2),16)<<24-4*(e%8)}return new c.init(b,l/2)}},k=o.Latin1={stringify:function(d){var l=d.words;d=d.sigBytes;for(var b=[],e=0;e>>2]>>>24-8*(e%4)&255))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>2]|=(d.charCodeAt(e)&255)<<24-8*(e%4)}return new c.init(b,l)}},m=o.Utf8={stringify:function(b){try{return decodeURIComponent(escape(k.stringify(b)))}catch(d){throw Error("Malformed UTF-8 data")}},parse:function(b){return k.parse(unescape(encodeURIComponent(b)))}},a=g.BufferedBlockAlgorithm=z.extend({reset:function(){this._data=new c.init;this._nDataBytes=0},_append:function(b){"string"==typeof b&&(b=m.parse(b));this._data.concat(b);this._nDataBytes+=b.sigBytes},_process:function(n){var s=this._data,l=s.words,q=s.sigBytes,p=this.blockSize,d=q/(4*p),d=n?y.ceil(d):y.max((d|0)-this._minBufferSize,0);n=d*p;q=y.min(4*n,q);if(n){for(var r=0;r>>2]>>>24-8*(h%4)&255)<<16|(j[h+1>>>2]>>>24-8*((h+1)%4)&255)<<8|j[h+2>>>2]>>>24-8*((h+2)%4)&255,f=0;4>f&&h+0.75*f>>6*(3-f)&63))}}if(j=g.charAt(64)){for(;i.length%4;){i.push(j)}}return i.join("")},parse:function(j){var k=j.length,i=this._map,g=i.charAt(64);g&&(g=j.indexOf(g),-1!=g&&(k=g));for(var g=[],h=0,e=0;e>>6-2*(e%4);g[h>>>2]|=(f|c)<<24-8*(h%4);h++}}return a.create(g,h)},_map:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="}})();(function(m){function f(l,r,n,s,d,q,p){l=l+(r&n|~r&s)+d+p;return(l<>>32-q)+r}function g(l,r,n,s,d,q,p){l=l+(r&s|n&~s)+d+p;return(l<>>32-q)+r}function e(l,r,n,s,d,q,p){l=l+(r^n^s)+d+p;return(l<>>32-q)+r}function c(l,r,n,s,d,q,p){l=l+(n^(r|~s))+d+p;return(l<>>32-q)+r}for(var o=CryptoJS,a=o.lib,j=a.WordArray,k=a.Hasher,a=o.algo,h=[],i=0;64>i;i++){h[i]=4294967296*m.abs(m.sin(i+1))|0}a=a.MD5=k.extend({_doReset:function(){this._hash=new j.init([1732584193,4023233417,2562383102,271733878])},_doProcessBlock:function(J,T){for(var V=0;16>V;V++){var U=T+V,N=J[U];J[U]=(N<<8|N>>>24)&16711935|(N<<24|N>>>8)&4278255360}var V=this._hash.words,U=J[T+0],N=J[T+1],S=J[T+2],F=J[T+3],d=J[T+4],L=J[T+5],H=J[T+6],n=J[T+7],p=J[T+8],E=J[T+9],l=J[T+10],b=J[T+11],M=J[T+12],K=J[T+13],I=J[T+14],G=J[T+15],R=V[0],Q=V[1],P=V[2],O=V[3],R=f(R,Q,P,O,U,7,h[0]),O=f(O,R,Q,P,N,12,h[1]),P=f(P,O,R,Q,S,17,h[2]),Q=f(Q,P,O,R,F,22,h[3]),R=f(R,Q,P,O,d,7,h[4]),O=f(O,R,Q,P,L,12,h[5]),P=f(P,O,R,Q,H,17,h[6]),Q=f(Q,P,O,R,n,22,h[7]),R=f(R,Q,P,O,p,7,h[8]),O=f(O,R,Q,P,E,12,h[9]),P=f(P,O,R,Q,l,17,h[10]),Q=f(Q,P,O,R,b,22,h[11]),R=f(R,Q,P,O,M,7,h[12]),O=f(O,R,Q,P,K,12,h[13]),P=f(P,O,R,Q,I,17,h[14]),Q=f(Q,P,O,R,G,22,h[15]),R=g(R,Q,P,O,N,5,h[16]),O=g(O,R,Q,P,H,9,h[17]),P=g(P,O,R,Q,b,14,h[18]),Q=g(Q,P,O,R,U,20,h[19]),R=g(R,Q,P,O,L,5,h[20]),O=g(O,R,Q,P,l,9,h[21]),P=g(P,O,R,Q,G,14,h[22]),Q=g(Q,P,O,R,d,20,h[23]),R=g(R,Q,P,O,E,5,h[24]),O=g(O,R,Q,P,I,9,h[25]),P=g(P,O,R,Q,F,14,h[26]),Q=g(Q,P,O,R,p,20,h[27]),R=g(R,Q,P,O,K,5,h[28]),O=g(O,R,Q,P,S,9,h[29]),P=g(P,O,R,Q,n,14,h[30]),Q=g(Q,P,O,R,M,20,h[31]),R=e(R,Q,P,O,L,4,h[32]),O=e(O,R,Q,P,p,11,h[33]),P=e(P,O,R,Q,b,16,h[34]),Q=e(Q,P,O,R,I,23,h[35]),R=e(R,Q,P,O,N,4,h[36]),O=e(O,R,Q,P,d,11,h[37]),P=e(P,O,R,Q,n,16,h[38]),Q=e(Q,P,O,R,l,23,h[39]),R=e(R,Q,P,O,K,4,h[40]),O=e(O,R,Q,P,U,11,h[41]),P=e(P,O,R,Q,F,16,h[42]),Q=e(Q,P,O,R,H,23,h[43]),R=e(R,Q,P,O,E,4,h[44]),O=e(O,R,Q,P,M,11,h[45]),P=e(P,O,R,Q,G,16,h[46]),Q=e(Q,P,O,R,S,23,h[47]),R=c(R,Q,P,O,U,6,h[48]),O=c(O,R,Q,P,n,10,h[49]),P=c(P,O,R,Q,I,15,h[50]),Q=c(Q,P,O,R,L,21,h[51]),R=c(R,Q,P,O,M,6,h[52]),O=c(O,R,Q,P,F,10,h[53]),P=c(P,O,R,Q,l,15,h[54]),Q=c(Q,P,O,R,N,21,h[55]),R=c(R,Q,P,O,p,6,h[56]),O=c(O,R,Q,P,G,10,h[57]),P=c(P,O,R,Q,H,15,h[58]),Q=c(Q,P,O,R,K,21,h[59]),R=c(R,Q,P,O,d,6,h[60]),O=c(O,R,Q,P,b,10,h[61]),P=c(P,O,R,Q,S,15,h[62]),Q=c(Q,P,O,R,E,21,h[63]);
V[0]=V[0]+R|0;V[1]=V[1]+Q|0;V[2]=V[2]+P|0;V[3]=V[3]+O|0},_doFinalize:function(){var l=this._data,p=l.words,n=8*this._nDataBytes,q=8*l.sigBytes;p[q>>>5]|=128<<24-q%32;var d=m.floor(n/4294967296);p[(q+64>>>9<<4)+15]=(d<<8|d>>>24)&16711935|(d<<24|d>>>8)&4278255360;p[(q+64>>>9<<4)+14]=(n<<8|n>>>24)&16711935|(n<<24|n>>>8)&4278255360;l.sigBytes=4*(p.length+1);this._process();l=this._hash;p=l.words;for(n=0;4>n;n++){q=p[n],p[n]=(q<<8|q>>>24)&16711935|(q<<24|q>>>8)&4278255360}return l},clone:function(){var d=k.clone.call(this);d._hash=this._hash.clone();return d}});o.MD5=k._createHelper(a);o.HmacMD5=k._createHmacHelper(a)})(Math);(function(){var b=CryptoJS,a=b.lib,e=a.Base,f=a.WordArray,a=b.algo,c=a.EvpKDF=e.extend({cfg:e.extend({keySize:4,hasher:a.MD5,iterations:1}),init:function(g){this.cfg=this.cfg.extend(g)},compute:function(k,i){for(var h=this.cfg,o=h.hasher.create(),m=f.create(),q=m.words,g=h.keySize,h=h.iterations;q.length>>2]&255}};m.BlockCipher=B.extend({cfg:B.cfg.extend({mode:t,padding:f}),reset:function(){B.reset.call(this);var e=this.cfg,l=e.iv,e=e.mode;if(this._xformMode==this._ENC_XFORM_MODE){var d=e.createEncryptor}else{d=e.createDecryptor,this._minBufferSize=1}this._mode=d.call(e,this,l&&l.words)},_doProcessBlock:function(b,d){this._mode.processBlock(b,d)},_doFinalize:function(){var b=this.cfg.padding;if(this._xformMode==this._ENC_XFORM_MODE){b.pad(this._data,this.blockSize);var d=this._process(!0)}else{d=this._process(!0),b.unpad(d)}return d},blockSize:4});var k=m.CipherParams=i.extend({init:function(b){this.mixIn(b)},toString:function(b){return(b||this.formatter).stringify(this)}}),t=(j.format={}).OpenSSL={stringify:function(b){var d=b.ciphertext;b=b.salt;return(b?h.create([1398893684,1701076831]).concat(b).concat(d):d).toString(g)},parse:function(e){e=g.parse(e);var l=e.words;if(1398893684==l[0]&&1701076831==l[1]){var d=h.create(l.slice(2,4));l.splice(0,4);e.sigBytes-=16}return k.create({ciphertext:e,salt:d})}},y=m.SerializableCipher=i.extend({cfg:i.extend({format:t}),encrypt:function(p,r,e,q){q=this.cfg.extend(q);var n=p.createEncryptor(e,q);r=n.finalize(r);n=n.cfg;return k.create({ciphertext:r,key:e,iv:n.iv,algorithm:p,mode:n.mode,padding:n.padding,blockSize:p.blockSize,formatter:q.format})},decrypt:function(l,p,d,n){n=this.cfg.extend(n);p=this._parse(p,n.format);return l.createDecryptor(d,n).finalize(p.ciphertext)},_parse:function(b,d){return"string"==typeof b?d.parse(b,this):b}}),j=(j.kdf={}).OpenSSL={execute:function(l,p,e,n){n||(n=h.random(8));l=A.create({keySize:p+e}).compute(l,n);e=h.create(l.words.slice(p),4*e);l.sigBytes=4*p;return k.create({key:l,iv:e,salt:n})}},o=m.PasswordBasedCipher=y.extend({cfg:y.cfg.extend({kdf:j}),encrypt:function(p,a,l,n){n=this.cfg.extend(n);l=n.kdf.execute(l,p.keySize,p.ivSize);n.iv=l.iv;p=y.encrypt.call(this,p,a,l.key,n);p.mixIn(l);return p},decrypt:function(p,a,l,n){n=this.cfg.extend(n);a=this._parse(a,n.format);l=n.kdf.execute(l,p.keySize,p.ivSize,a.salt);n.iv=l.iv;return y.decrypt.call(this,p,a,l.key,n)}})
}();(function(){function o(d,l){var n=(this._lBlock>>>d^this._rBlock)&l;this._rBlock^=n;this._lBlock^=n<>>d^this._lBlock)&l;this._lBlock^=n;this._rBlock^=n<u;u++){var s=c[u]-1;q[u]=n[s>>>5]>>>31-s%32&1}n=this._subKeys=[];for(s=0;16>s;s++){for(var r=n[s]=[],p=m[s],u=0;24>u;u++){r[u/6|0]|=q[(k[u]-1+p)%28]<<31-u%6,r[4+(u/6|0)]|=q[28+(k[u+24]-1+p)%28]<<31-u%6}r[0]=r[0]<<1|r[0]>>>31;for(u=1;7>u;u++){r[u]>>>=4*(u-1)+3}r[7]=r[7]<<5|r[7]>>>27}q=this._invSubKeys=[];for(u=0;16>u;u++){q[u]=n[15-u]}},encryptBlock:function(d,l){this._doCryptBlock(d,l,this._subKeys)},decryptBlock:function(d,l){this._doCryptBlock(d,l,this._invSubKeys)},_doCryptBlock:function(w,z,y){this._lBlock=w[z];this._rBlock=w[z+1];o.call(this,4,252645135);o.call(this,16,65535);g.call(this,2,858993459);g.call(this,8,16711935);o.call(this,1,1431655765);for(var x=0;16>x;x++){for(var v=y[x],u=this._lBlock,s=this._rBlock,l=0,b=0;8>b;b++){l|=i[b][((s^v[b])&j[b])>>>0]}this._lBlock=s;this._rBlock=u^l}y=this._lBlock;this._lBlock=this._rBlock;this._rBlock=y;o.call(this,1,1431655765);g.call(this,8,16711935);g.call(this,2,858993459);o.call(this,16,65535);o.call(this,4,252645135);w[z]=this._lBlock;w[z+1]=this._rBlock},keySize:2,ivSize:2,blockSize:2});h.DES=f._createHelper(a);t=t.TripleDES=f.extend({_doReset:function(){var d=this._key.words;this._des1=a.createEncryptor(e.create(d.slice(0,2)));this._des2=a.createEncryptor(e.create(d.slice(2,4)));this._des3=a.createEncryptor(e.create(d.slice(4,6)))},encryptBlock:function(d,l){this._des1.encryptBlock(d,l);this._des2.decryptBlock(d,l);this._des3.encryptBlock(d,l)},decryptBlock:function(d,l){this._des3.decryptBlock(d,l);this._des2.encryptBlock(d,l);this._des1.decryptBlock(d,l)},keySize:6,ivSize:2,blockSize:2});h.TripleDES=f._createHelper(t)})();var DES3={iv:function(){return $.WebSite.formatDate(new Date(),"yyyyMMdd")},encrypt:function(b,c,a){if(c){return(CryptoJS.TripleDES.encrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""},decrypt:function(b,c,a){if(c){return CryptoJS.enc.Utf8.stringify(CryptoJS.TripleDES.decrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""}};

我们随便找个网页把这段代码贴进去,并把刚刚postman中得到的result和secretKey填进去,可以发现解密代码会报错
反爬虫破解——裁判文书网_第6张图片
可以看到是没有formatDate属性。点击我上图框的位置找到报错的js
反爬虫破解——裁判文书网_第7张图片
可以看到在是因为取不到 $.webSite.formateDate。现在我们返回文书网的website.js搜索formateDate,可以看到formateDate函数
反爬虫破解——裁判文书网_第8张图片
那么我们修改js代码,把

iv: function() {
        return $.WebSite.formatDate(new Date(), "yyyyMMdd")
    },

改为

	iv: function() {
        return myformatDate(new Date(), "yyyyMMdd")
    },

并在js中添加如下方法

function myformatDate(v, format) {
            if (!v)
                return "";
            var d = v;
            if (typeof v === 'string') {
                if (v.indexOf("/Date(") > -1)
                    d = new Date(parseInt(v.replace("/Date(", "").replace(")/", ""), 10));
                else
                    d = new Date(Date.parse(v.replace(/-/g, "/").replace("T", " ").split(".")[0]));
                // 用来处理出现毫秒的情况,截取掉.xxx,否则会出错
            } else if (typeof v === "number") {
                d = new Date(v);
            }
            var o = {
                "M+": d.getMonth() + 1,
                // month
                "d+": d.getDate(),
                // day
                "h+": d.getHours(),
                // hour
                "m+": d.getMinutes(),
                // minute
                "s+": d.getSeconds(),
                // second
                "q+": Math.floor((d.getMonth() + 3) / 3),
                // quarter
                "S": d.getMilliseconds()// millisecond
            };
            format = format || "yyyy-MM-dd";
            if (/(y+)/.test(format)) {
                format = format.replace(RegExp.$1, (d.getFullYear() + "").substr(4 - RegExp.$1.length));
            }
            for (var k in o) {
                if (new RegExp("(" + k + ")").test(format)) {
                    format = format.replace(RegExp.$1, RegExp.$1.length == 1 ? o[k] : ("00" + o[k]).substr(("" + o[k]).length));
                }
            }
            return format;
        }

更改后完成js代码如下

var CryptoJS=CryptoJS||function(y,h){var j={},g=j.lib={},f=function(){},z=g.Base={extend:function(b){f.prototype=this;var d=new f;b&&d.mixIn(b);d.hasOwnProperty("init")||(d.init=function(){d.$super.init.apply(this,arguments)});d.init.prototype=d;d.$super=this;return d},create:function(){var b=this.extend();b.init.apply(b,arguments);return b},init:function(){},mixIn:function(b){for(var d in b){b.hasOwnProperty(d)&&(this[d]=b[d])}b.hasOwnProperty("toString")&&(this.toString=b.toString)},clone:function(){return this.init.prototype.extend(this)}},c=g.WordArray=z.extend({init:function(b,d){b=this.words=b||[];this.sigBytes=d!=h?d:4*b.length},toString:function(b){return(b||t).stringify(this)},concat:function(d){var n=this.words,b=d.words,l=this.sigBytes;d=d.sigBytes;this.clamp();if(l%4){for(var e=0;e>>2]|=(b[e>>>2]>>>24-8*(e%4)&255)<<24-8*((l+e)%4)}}else{if(65535>>2]=b[e>>>2]}}else{n.push.apply(n,b)}}this.sigBytes+=d;return this},clamp:function(){var b=this.words,d=this.sigBytes;b[d>>>2]&=4294967295<<32-8*(d%4);b.length=y.ceil(d/4)},clone:function(){var b=z.clone.call(this);b.words=this.words.slice(0);return b},random:function(d){for(var e=[],b=0;b>>2]>>>24-8*(l%4)&255;b.push((e>>>4).toString(16));b.push((e&15).toString(16))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>3]|=parseInt(d.substr(e,2),16)<<24-4*(e%8)}return new c.init(b,l/2)}},k=o.Latin1={stringify:function(d){var l=d.words;d=d.sigBytes;for(var b=[],e=0;e>>2]>>>24-8*(e%4)&255))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>2]|=(d.charCodeAt(e)&255)<<24-8*(e%4)}return new c.init(b,l)}},m=o.Utf8={stringify:function(b){try{return decodeURIComponent(escape(k.stringify(b)))}catch(d){throw Error("Malformed UTF-8 data")}},parse:function(b){return k.parse(unescape(encodeURIComponent(b)))}},a=g.BufferedBlockAlgorithm=z.extend({reset:function(){this._data=new c.init;this._nDataBytes=0},_append:function(b){"string"==typeof b&&(b=m.parse(b));this._data.concat(b);this._nDataBytes+=b.sigBytes},_process:function(n){var s=this._data,l=s.words,q=s.sigBytes,p=this.blockSize,d=q/(4*p),d=n?y.ceil(d):y.max((d|0)-this._minBufferSize,0);n=d*p;q=y.min(4*n,q);if(n){for(var r=0;r>>2]>>>24-8*(h%4)&255)<<16|(j[h+1>>>2]>>>24-8*((h+1)%4)&255)<<8|j[h+2>>>2]>>>24-8*((h+2)%4)&255,f=0;4>f&&h+0.75*f>>6*(3-f)&63))}}if(j=g.charAt(64)){for(;i.length%4;){i.push(j)}}return i.join("")},parse:function(j){var k=j.length,i=this._map,g=i.charAt(64);g&&(g=j.indexOf(g),-1!=g&&(k=g));for(var g=[],h=0,e=0;e>>6-2*(e%4);g[h>>>2]|=(f|c)<<24-8*(h%4);h++}}return a.create(g,h)},_map:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="}})();(function(m){function f(l,r,n,s,d,q,p){l=l+(r&n|~r&s)+d+p;return(l<>>32-q)+r}function g(l,r,n,s,d,q,p){l=l+(r&s|n&~s)+d+p;return(l<>>32-q)+r}function e(l,r,n,s,d,q,p){l=l+(r^n^s)+d+p;return(l<>>32-q)+r}function c(l,r,n,s,d,q,p){l=l+(n^(r|~s))+d+p;return(l<>>32-q)+r}for(var o=CryptoJS,a=o.lib,j=a.WordArray,k=a.Hasher,a=o.algo,h=[],i=0;64>i;i++){h[i]=4294967296*m.abs(m.sin(i+1))|0}a=a.MD5=k.extend({_doReset:function(){this._hash=new j.init([1732584193,4023233417,2562383102,271733878])},_doProcessBlock:function(J,T){for(var V=0;16>V;V++){var U=T+V,N=J[U];J[U]=(N<<8|N>>>24)&16711935|(N<<24|N>>>8)&4278255360}var V=this._hash.words,U=J[T+0],N=J[T+1],S=J[T+2],F=J[T+3],d=J[T+4],L=J[T+5],H=J[T+6],n=J[T+7],p=J[T+8],E=J[T+9],l=J[T+10],b=J[T+11],M=J[T+12],K=J[T+13],I=J[T+14],G=J[T+15],R=V[0],Q=V[1],P=V[2],O=V[3],R=f(R,Q,P,O,U,7,h[0]),O=f(O,R,Q,P,N,12,h[1]),P=f(P,O,R,Q,S,17,h[2]),Q=f(Q,P,O,R,F,22,h[3]),R=f(R,Q,P,O,d,7,h[4]),O=f(O,R,Q,P,L,12,h[5]),P=f(P,O,R,Q,H,17,h[6]),Q=f(Q,P,O,R,n,22,h[7]),R=f(R,Q,P,O,p,7,h[8]),O=f(O,R,Q,P,E,12,h[9]),P=f(P,O,R,Q,l,17,h[10]),Q=f(Q,P,O,R,b,22,h[11]),R=f(R,Q,P,O,M,7,h[12]),O=f(O,R,Q,P,K,12,h[13]),P=f(P,O,R,Q,I,17,h[14]),Q=f(Q,P,O,R,G,22,h[15]),R=g(R,Q,P,O,N,5,h[16]),O=g(O,R,Q,P,H,9,h[17]),P=g(P,O,R,Q,b,14,h[18]),Q=g(Q,P,O,R,U,20,h[19]),R=g(R,Q,P,O,L,5,h[20]),O=g(O,R,Q,P,l,9,h[21]),P=g(P,O,R,Q,G,14,h[22]),Q=g(Q,P,O,R,d,20,h[23]),R=g(R,Q,P,O,E,5,h[24]),O=g(O,R,Q,P,I,9,h[25]),P=g(P,O,R,Q,F,14,h[26]),Q=g(Q,P,O,R,p,20,h[27]),R=g(R,Q,P,O,K,5,h[28]),O=g(O,R,Q,P,S,9,h[29]),P=g(P,O,R,Q,n,14,h[30]),Q=g(Q,P,O,R,M,20,h[31]),R=e(R,Q,P,O,L,4,h[32]),O=e(O,R,Q,P,p,11,h[33]),P=e(P,O,R,Q,b,16,h[34]),Q=e(Q,P,O,R,I,23,h[35]),R=e(R,Q,P,O,N,4,h[36]),O=e(O,R,Q,P,d,11,h[37]),P=e(P,O,R,Q,n,16,h[38]),Q=e(Q,P,O,R,l,23,h[39]),R=e(R,Q,P,O,K,4,h[40]),O=e(O,R,Q,P,U,11,h[41]),P=e(P,O,R,Q,F,16,h[42]),Q=e(Q,P,O,R,H,23,h[43]),R=e(R,Q,P,O,E,4,h[44]),O=e(O,R,Q,P,M,11,h[45]),P=e(P,O,R,Q,G,16,h[46]),Q=e(Q,P,O,R,S,23,h[47]),R=c(R,Q,P,O,U,6,h[48]),O=c(O,R,Q,P,n,10,h[49]),P=c(P,O,R,Q,I,15,h[50]),Q=c(Q,P,O,R,L,21,h[51]),R=c(R,Q,P,O,M,6,h[52]),O=c(O,R,Q,P,F,10,h[53]),P=c(P,O,R,Q,l,15,h[54]),Q=c(Q,P,O,R,N,21,h[55]),R=c(R,Q,P,O,p,6,h[56]),O=c(O,R,Q,P,G,10,h[57]),P=c(P,O,R,Q,H,15,h[58]),Q=c(Q,P,O,R,K,21,h[59]),R=c(R,Q,P,O,d,6,h[60]),O=c(O,R,Q,P,b,10,h[61]),P=c(P,O,R,Q,S,15,h[62]),Q=c(Q,P,O,R,E,21,h[63]);
V[0]=V[0]+R|0;V[1]=V[1]+Q|0;V[2]=V[2]+P|0;V[3]=V[3]+O|0},_doFinalize:function(){var l=this._data,p=l.words,n=8*this._nDataBytes,q=8*l.sigBytes;p[q>>>5]|=128<<24-q%32;var d=m.floor(n/4294967296);p[(q+64>>>9<<4)+15]=(d<<8|d>>>24)&16711935|(d<<24|d>>>8)&4278255360;p[(q+64>>>9<<4)+14]=(n<<8|n>>>24)&16711935|(n<<24|n>>>8)&4278255360;l.sigBytes=4*(p.length+1);this._process();l=this._hash;p=l.words;for(n=0;4>n;n++){q=p[n],p[n]=(q<<8|q>>>24)&16711935|(q<<24|q>>>8)&4278255360}return l},clone:function(){var d=k.clone.call(this);d._hash=this._hash.clone();return d}});o.MD5=k._createHelper(a);o.HmacMD5=k._createHmacHelper(a)})(Math);(function(){var b=CryptoJS,a=b.lib,e=a.Base,f=a.WordArray,a=b.algo,c=a.EvpKDF=e.extend({cfg:e.extend({keySize:4,hasher:a.MD5,iterations:1}),init:function(g){this.cfg=this.cfg.extend(g)},compute:function(k,i){for(var h=this.cfg,o=h.hasher.create(),m=f.create(),q=m.words,g=h.keySize,h=h.iterations;q.length>>2]&255}};m.BlockCipher=B.extend({cfg:B.cfg.extend({mode:t,padding:f}),reset:function(){B.reset.call(this);var e=this.cfg,l=e.iv,e=e.mode;if(this._xformMode==this._ENC_XFORM_MODE){var d=e.createEncryptor}else{d=e.createDecryptor,this._minBufferSize=1}this._mode=d.call(e,this,l&&l.words)},_doProcessBlock:function(b,d){this._mode.processBlock(b,d)},_doFinalize:function(){var b=this.cfg.padding;if(this._xformMode==this._ENC_XFORM_MODE){b.pad(this._data,this.blockSize);var d=this._process(!0)}else{d=this._process(!0),b.unpad(d)}return d},blockSize:4});var k=m.CipherParams=i.extend({init:function(b){this.mixIn(b)},toString:function(b){return(b||this.formatter).stringify(this)}}),t=(j.format={}).OpenSSL={stringify:function(b){var d=b.ciphertext;b=b.salt;return(b?h.create([1398893684,1701076831]).concat(b).concat(d):d).toString(g)},parse:function(e){e=g.parse(e);var l=e.words;if(1398893684==l[0]&&1701076831==l[1]){var d=h.create(l.slice(2,4));l.splice(0,4);e.sigBytes-=16}return k.create({ciphertext:e,salt:d})}},y=m.SerializableCipher=i.extend({cfg:i.extend({format:t}),encrypt:function(p,r,e,q){q=this.cfg.extend(q);var n=p.createEncryptor(e,q);r=n.finalize(r);n=n.cfg;return k.create({ciphertext:r,key:e,iv:n.iv,algorithm:p,mode:n.mode,padding:n.padding,blockSize:p.blockSize,formatter:q.format})},decrypt:function(l,p,d,n){n=this.cfg.extend(n);p=this._parse(p,n.format);return l.createDecryptor(d,n).finalize(p.ciphertext)},_parse:function(b,d){return"string"==typeof b?d.parse(b,this):b}}),j=(j.kdf={}).OpenSSL={execute:function(l,p,e,n){n||(n=h.random(8));l=A.create({keySize:p+e}).compute(l,n);e=h.create(l.words.slice(p),4*e);l.sigBytes=4*p;return k.create({key:l,iv:e,salt:n})}},o=m.PasswordBasedCipher=y.extend({cfg:y.cfg.extend({kdf:j}),encrypt:function(p,a,l,n){n=this.cfg.extend(n);l=n.kdf.execute(l,p.keySize,p.ivSize);n.iv=l.iv;p=y.encrypt.call(this,p,a,l.key,n);p.mixIn(l);return p},decrypt:function(p,a,l,n){n=this.cfg.extend(n);a=this._parse(a,n.format);l=n.kdf.execute(l,p.keySize,p.ivSize,a.salt);n.iv=l.iv;return y.decrypt.call(this,p,a,l.key,n)}})
}();(function(){function o(d,l){var n=(this._lBlock>>>d^this._rBlock)&l;this._rBlock^=n;this._lBlock^=n<>>d^this._lBlock)&l;this._lBlock^=n;this._rBlock^=n<u;u++){var s=c[u]-1;q[u]=n[s>>>5]>>>31-s%32&1}n=this._subKeys=[];for(s=0;16>s;s++){for(var r=n[s]=[],p=m[s],u=0;24>u;u++){r[u/6|0]|=q[(k[u]-1+p)%28]<<31-u%6,r[4+(u/6|0)]|=q[28+(k[u+24]-1+p)%28]<<31-u%6}r[0]=r[0]<<1|r[0]>>>31;for(u=1;7>u;u++){r[u]>>>=4*(u-1)+3}r[7]=r[7]<<5|r[7]>>>27}q=this._invSubKeys=[];for(u=0;16>u;u++){q[u]=n[15-u]}},encryptBlock:function(d,l){this._doCryptBlock(d,l,this._subKeys)},decryptBlock:function(d,l){this._doCryptBlock(d,l,this._invSubKeys)},_doCryptBlock:function(w,z,y){this._lBlock=w[z];this._rBlock=w[z+1];o.call(this,4,252645135);o.call(this,16,65535);g.call(this,2,858993459);g.call(this,8,16711935);o.call(this,1,1431655765);for(var x=0;16>x;x++){for(var v=y[x],u=this._lBlock,s=this._rBlock,l=0,b=0;8>b;b++){l|=i[b][((s^v[b])&j[b])>>>0]}this._lBlock=s;this._rBlock=u^l}y=this._lBlock;this._lBlock=this._rBlock;this._rBlock=y;o.call(this,1,1431655765);g.call(this,8,16711935);g.call(this,2,858993459);o.call(this,16,65535);o.call(this,4,252645135);w[z]=this._lBlock;w[z+1]=this._rBlock},keySize:2,ivSize:2,blockSize:2});h.DES=f._createHelper(a);t=t.TripleDES=f.extend({_doReset:function(){var d=this._key.words;this._des1=a.createEncryptor(e.create(d.slice(0,2)));this._des2=a.createEncryptor(e.create(d.slice(2,4)));this._des3=a.createEncryptor(e.create(d.slice(4,6)))},encryptBlock:function(d,l){this._des1.encryptBlock(d,l);this._des2.decryptBlock(d,l);this._des3.encryptBlock(d,l)},decryptBlock:function(d,l){this._des3.decryptBlock(d,l);this._des2.encryptBlock(d,l);this._des1.decryptBlock(d,l)},keySize:6,ivSize:2,blockSize:2});h.TripleDES=f._createHelper(t)})();function myformatDate(v,format){if(!v){return""}var d=v;if(typeof v==="string"){if(v.indexOf("/Date(")>-1){d=new Date(parseInt(v.replace("/Date(","").replace(")/",""),10))}else{d=new Date(Date.parse(v.replace(/-/g,"/").replace("T"," ").split(".")[0]))}}else{if(typeof v==="number"){d=new Date(v)}}var o={"M+":d.getMonth()+1,"d+":d.getDate(),"h+":d.getHours(),"m+":d.getMinutes(),"s+":d.getSeconds(),"q+":Math.floor((d.getMonth()+3)/3),"S":d.getMilliseconds()};format=format||"yyyy-MM-dd";if(/(y+)/.test(format)){format=format.replace(RegExp.$1,(d.getFullYear()+"").substr(4-RegExp.$1.length))}for(var k in o){if(new RegExp("("+k+")").test(format)){format=format.replace(RegExp.$1,RegExp.$1.length==1?o[k]:("00"+o[k]).substr((""+o[k]).length))}}return format}var DES3={iv:function(){return myformatDate(new Date(),"yyyyMMdd")},encrypt:function(b,c,a){if(c){return(CryptoJS.TripleDES.encrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""},decrypt:function(b,c,a){if(c){return CryptoJS.enc.Utf8.stringify(CryptoJS.TripleDES.decrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""}};

我们再随便找个网页,并调用DES3.decrypt(“xxx”,“xxx”),可以发现已经能解密成功了
反爬虫破解——裁判文书网_第9张图片
ok,到这里解密就完成了。现在我们回过头来看一下请求,再次在postman中执行刚刚的请求,可以发现已经不通了
反爬虫破解——裁判文书网_第10张图片

经过与成功的请求对比可以发现参数ciphertext、__RequestVerificationToken发生了改变,那么我们研究下这两个参数是怎么生成的。

经过全局搜索可以发现ciphertext生成的位置
反爬虫破解——裁判文书网_第11张图片
可以发现strToBinary.js就是用来生成ciphertext的。
全局搜索__RequestVerificationToken很容易发现生成位置
在这里插入图片描述
通过断点调试可以找到random函数
反爬虫破解——裁判文书网_第12张图片

到这里请求已经已经能发送成功并解密成功了,但这个返回的只是文书的简略信息以及docId,并没有文书全部内容,因此我们还需要再分析如何通过返回的docId来爬取文书全文。鉴于获取文书全文的加解密方法都一致我这里不做赘述,需要的同学可以直接代码

思路整理

1、登陆网站获取cookie
2、通过上述函数计算出ciphertext与__RequestVerificationToken的值,并封装请求
3、拿到响应后通过上述解密函数进行数据解密拿到docId
4、通过docId获取文书全部内容

Coding

首先把上述的js整理在一起,完整js压缩后如下

var CryptoJS=CryptoJS||function(y,h){var j={},g=j.lib={},f=function(){},z=g.Base={extend:function(b){f.prototype=this;var d=new f;b&&d.mixIn(b);d.hasOwnProperty("init")||(d.init=function(){d.$super.init.apply(this,arguments)});d.init.prototype=d;d.$super=this;return d},create:function(){var b=this.extend();b.init.apply(b,arguments);return b},init:function(){},mixIn:function(b){for(var d in b){b.hasOwnProperty(d)&&(this[d]=b[d])}b.hasOwnProperty("toString")&&(this.toString=b.toString)},clone:function(){return this.init.prototype.extend(this)}},c=g.WordArray=z.extend({init:function(b,d){b=this.words=b||[];this.sigBytes=d!=h?d:4*b.length},toString:function(b){return(b||t).stringify(this)},concat:function(d){var n=this.words,b=d.words,l=this.sigBytes;d=d.sigBytes;this.clamp();if(l%4){for(var e=0;e>>2]|=(b[e>>>2]>>>24-8*(e%4)&255)<<24-8*((l+e)%4)}}else{if(65535>>2]=b[e>>>2]}}else{n.push.apply(n,b)}}this.sigBytes+=d;return this},clamp:function(){var b=this.words,d=this.sigBytes;b[d>>>2]&=4294967295<<32-8*(d%4);b.length=y.ceil(d/4)},clone:function(){var b=z.clone.call(this);b.words=this.words.slice(0);return b},random:function(d){for(var e=[],b=0;b>>2]>>>24-8*(l%4)&255;b.push((e>>>4).toString(16));b.push((e&15).toString(16))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>3]|=parseInt(d.substr(e,2),16)<<24-4*(e%8)}return new c.init(b,l/2)}},k=o.Latin1={stringify:function(d){var l=d.words;d=d.sigBytes;for(var b=[],e=0;e>>2]>>>24-8*(e%4)&255))}return b.join("")},parse:function(d){for(var l=d.length,b=[],e=0;e>>2]|=(d.charCodeAt(e)&255)<<24-8*(e%4)}return new c.init(b,l)}},m=o.Utf8={stringify:function(b){try{return decodeURIComponent(escape(k.stringify(b)))}catch(d){throw Error("Malformed UTF-8 data")}},parse:function(b){return k.parse(unescape(encodeURIComponent(b)))}},a=g.BufferedBlockAlgorithm=z.extend({reset:function(){this._data=new c.init;this._nDataBytes=0},_append:function(b){"string"==typeof b&&(b=m.parse(b));this._data.concat(b);this._nDataBytes+=b.sigBytes},_process:function(n){var s=this._data,l=s.words,q=s.sigBytes,p=this.blockSize,d=q/(4*p),d=n?y.ceil(d):y.max((d|0)-this._minBufferSize,0);n=d*p;q=y.min(4*n,q);if(n){for(var r=0;r>>2]>>>24-8*(h%4)&255)<<16|(j[h+1>>>2]>>>24-8*((h+1)%4)&255)<<8|j[h+2>>>2]>>>24-8*((h+2)%4)&255,f=0;4>f&&h+0.75*f>>6*(3-f)&63))}}if(j=g.charAt(64)){for(;i.length%4;){i.push(j)}}return i.join("")},parse:function(j){var k=j.length,i=this._map,g=i.charAt(64);g&&(g=j.indexOf(g),-1!=g&&(k=g));for(var g=[],h=0,e=0;e>>6-2*(e%4);g[h>>>2]|=(f|c)<<24-8*(h%4);h++}}return a.create(g,h)},_map:"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="}})();(function(m){function f(l,r,n,s,d,q,p){l=l+(r&n|~r&s)+d+p;return(l<>>32-q)+r}function g(l,r,n,s,d,q,p){l=l+(r&s|n&~s)+d+p;return(l<>>32-q)+r}function e(l,r,n,s,d,q,p){l=l+(r^n^s)+d+p;return(l<>>32-q)+r}function c(l,r,n,s,d,q,p){l=l+(n^(r|~s))+d+p;return(l<>>32-q)+r}for(var o=CryptoJS,a=o.lib,j=a.WordArray,k=a.Hasher,a=o.algo,h=[],i=0;64>i;i++){h[i]=4294967296*m.abs(m.sin(i+1))|0}a=a.MD5=k.extend({_doReset:function(){this._hash=new j.init([1732584193,4023233417,2562383102,271733878])},_doProcessBlock:function(J,T){for(var V=0;16>V;V++){var U=T+V,N=J[U];J[U]=(N<<8|N>>>24)&16711935|(N<<24|N>>>8)&4278255360}var V=this._hash.words,U=J[T+0],N=J[T+1],S=J[T+2],F=J[T+3],d=J[T+4],L=J[T+5],H=J[T+6],n=J[T+7],p=J[T+8],E=J[T+9],l=J[T+10],b=J[T+11],M=J[T+12],K=J[T+13],I=J[T+14],G=J[T+15],R=V[0],Q=V[1],P=V[2],O=V[3],R=f(R,Q,P,O,U,7,h[0]),O=f(O,R,Q,P,N,12,h[1]),P=f(P,O,R,Q,S,17,h[2]),Q=f(Q,P,O,R,F,22,h[3]),R=f(R,Q,P,O,d,7,h[4]),O=f(O,R,Q,P,L,12,h[5]),P=f(P,O,R,Q,H,17,h[6]),Q=f(Q,P,O,R,n,22,h[7]),R=f(R,Q,P,O,p,7,h[8]),O=f(O,R,Q,P,E,12,h[9]),P=f(P,O,R,Q,l,17,h[10]),Q=f(Q,P,O,R,b,22,h[11]),R=f(R,Q,P,O,M,7,h[12]),O=f(O,R,Q,P,K,12,h[13]),P=f(P,O,R,Q,I,17,h[14]),Q=f(Q,P,O,R,G,22,h[15]),R=g(R,Q,P,O,N,5,h[16]),O=g(O,R,Q,P,H,9,h[17]),P=g(P,O,R,Q,b,14,h[18]),Q=g(Q,P,O,R,U,20,h[19]),R=g(R,Q,P,O,L,5,h[20]),O=g(O,R,Q,P,l,9,h[21]),P=g(P,O,R,Q,G,14,h[22]),Q=g(Q,P,O,R,d,20,h[23]),R=g(R,Q,P,O,E,5,h[24]),O=g(O,R,Q,P,I,9,h[25]),P=g(P,O,R,Q,F,14,h[26]),Q=g(Q,P,O,R,p,20,h[27]),R=g(R,Q,P,O,K,5,h[28]),O=g(O,R,Q,P,S,9,h[29]),P=g(P,O,R,Q,n,14,h[30]),Q=g(Q,P,O,R,M,20,h[31]),R=e(R,Q,P,O,L,4,h[32]),O=e(O,R,Q,P,p,11,h[33]),P=e(P,O,R,Q,b,16,h[34]),Q=e(Q,P,O,R,I,23,h[35]),R=e(R,Q,P,O,N,4,h[36]),O=e(O,R,Q,P,d,11,h[37]),P=e(P,O,R,Q,n,16,h[38]),Q=e(Q,P,O,R,l,23,h[39]),R=e(R,Q,P,O,K,4,h[40]),O=e(O,R,Q,P,U,11,h[41]),P=e(P,O,R,Q,F,16,h[42]),Q=e(Q,P,O,R,H,23,h[43]),R=e(R,Q,P,O,E,4,h[44]),O=e(O,R,Q,P,M,11,h[45]),P=e(P,O,R,Q,G,16,h[46]),Q=e(Q,P,O,R,S,23,h[47]),R=c(R,Q,P,O,U,6,h[48]),O=c(O,R,Q,P,n,10,h[49]),P=c(P,O,R,Q,I,15,h[50]),Q=c(Q,P,O,R,L,21,h[51]),R=c(R,Q,P,O,M,6,h[52]),O=c(O,R,Q,P,F,10,h[53]),P=c(P,O,R,Q,l,15,h[54]),Q=c(Q,P,O,R,N,21,h[55]),R=c(R,Q,P,O,p,6,h[56]),O=c(O,R,Q,P,G,10,h[57]),P=c(P,O,R,Q,H,15,h[58]),Q=c(Q,P,O,R,K,21,h[59]),R=c(R,Q,P,O,d,6,h[60]),O=c(O,R,Q,P,b,10,h[61]),P=c(P,O,R,Q,S,15,h[62]),Q=c(Q,P,O,R,E,21,h[63]);
V[0]=V[0]+R|0;V[1]=V[1]+Q|0;V[2]=V[2]+P|0;V[3]=V[3]+O|0},_doFinalize:function(){var l=this._data,p=l.words,n=8*this._nDataBytes,q=8*l.sigBytes;p[q>>>5]|=128<<24-q%32;var d=m.floor(n/4294967296);p[(q+64>>>9<<4)+15]=(d<<8|d>>>24)&16711935|(d<<24|d>>>8)&4278255360;p[(q+64>>>9<<4)+14]=(n<<8|n>>>24)&16711935|(n<<24|n>>>8)&4278255360;l.sigBytes=4*(p.length+1);this._process();l=this._hash;p=l.words;for(n=0;4>n;n++){q=p[n],p[n]=(q<<8|q>>>24)&16711935|(q<<24|q>>>8)&4278255360}return l},clone:function(){var d=k.clone.call(this);d._hash=this._hash.clone();return d}});o.MD5=k._createHelper(a);o.HmacMD5=k._createHmacHelper(a)})(Math);(function(){var b=CryptoJS,a=b.lib,e=a.Base,f=a.WordArray,a=b.algo,c=a.EvpKDF=e.extend({cfg:e.extend({keySize:4,hasher:a.MD5,iterations:1}),init:function(g){this.cfg=this.cfg.extend(g)},compute:function(k,i){for(var h=this.cfg,o=h.hasher.create(),m=f.create(),q=m.words,g=h.keySize,h=h.iterations;q.length>>2]&255}};m.BlockCipher=B.extend({cfg:B.cfg.extend({mode:t,padding:f}),reset:function(){B.reset.call(this);var e=this.cfg,l=e.iv,e=e.mode;if(this._xformMode==this._ENC_XFORM_MODE){var d=e.createEncryptor}else{d=e.createDecryptor,this._minBufferSize=1}this._mode=d.call(e,this,l&&l.words)},_doProcessBlock:function(b,d){this._mode.processBlock(b,d)},_doFinalize:function(){var b=this.cfg.padding;if(this._xformMode==this._ENC_XFORM_MODE){b.pad(this._data,this.blockSize);var d=this._process(!0)}else{d=this._process(!0),b.unpad(d)}return d},blockSize:4});var k=m.CipherParams=i.extend({init:function(b){this.mixIn(b)},toString:function(b){return(b||this.formatter).stringify(this)}}),t=(j.format={}).OpenSSL={stringify:function(b){var d=b.ciphertext;b=b.salt;return(b?h.create([1398893684,1701076831]).concat(b).concat(d):d).toString(g)},parse:function(e){e=g.parse(e);var l=e.words;if(1398893684==l[0]&&1701076831==l[1]){var d=h.create(l.slice(2,4));l.splice(0,4);e.sigBytes-=16}return k.create({ciphertext:e,salt:d})}},y=m.SerializableCipher=i.extend({cfg:i.extend({format:t}),encrypt:function(p,r,e,q){q=this.cfg.extend(q);var n=p.createEncryptor(e,q);r=n.finalize(r);n=n.cfg;return k.create({ciphertext:r,key:e,iv:n.iv,algorithm:p,mode:n.mode,padding:n.padding,blockSize:p.blockSize,formatter:q.format})},decrypt:function(l,p,d,n){n=this.cfg.extend(n);p=this._parse(p,n.format);return l.createDecryptor(d,n).finalize(p.ciphertext)},_parse:function(b,d){return"string"==typeof b?d.parse(b,this):b}}),j=(j.kdf={}).OpenSSL={execute:function(l,p,e,n){n||(n=h.random(8));l=A.create({keySize:p+e}).compute(l,n);e=h.create(l.words.slice(p),4*e);l.sigBytes=4*p;return k.create({key:l,iv:e,salt:n})}},o=m.PasswordBasedCipher=y.extend({cfg:y.cfg.extend({kdf:j}),encrypt:function(p,a,l,n){n=this.cfg.extend(n);l=n.kdf.execute(l,p.keySize,p.ivSize);n.iv=l.iv;p=y.encrypt.call(this,p,a,l.key,n);p.mixIn(l);return p},decrypt:function(p,a,l,n){n=this.cfg.extend(n);a=this._parse(a,n.format);l=n.kdf.execute(l,p.keySize,p.ivSize,a.salt);n.iv=l.iv;return y.decrypt.call(this,p,a,l.key,n)}})
}();(function(){function o(d,l){var n=(this._lBlock>>>d^this._rBlock)&l;this._rBlock^=n;this._lBlock^=n<>>d^this._lBlock)&l;this._lBlock^=n;this._rBlock^=n<u;u++){var s=c[u]-1;q[u]=n[s>>>5]>>>31-s%32&1}n=this._subKeys=[];for(s=0;16>s;s++){for(var r=n[s]=[],p=m[s],u=0;24>u;u++){r[u/6|0]|=q[(k[u]-1+p)%28]<<31-u%6,r[4+(u/6|0)]|=q[28+(k[u+24]-1+p)%28]<<31-u%6}r[0]=r[0]<<1|r[0]>>>31;for(u=1;7>u;u++){r[u]>>>=4*(u-1)+3}r[7]=r[7]<<5|r[7]>>>27}q=this._invSubKeys=[];for(u=0;16>u;u++){q[u]=n[15-u]}},encryptBlock:function(d,l){this._doCryptBlock(d,l,this._subKeys)},decryptBlock:function(d,l){this._doCryptBlock(d,l,this._invSubKeys)},_doCryptBlock:function(w,z,y){this._lBlock=w[z];this._rBlock=w[z+1];o.call(this,4,252645135);o.call(this,16,65535);g.call(this,2,858993459);g.call(this,8,16711935);o.call(this,1,1431655765);for(var x=0;16>x;x++){for(var v=y[x],u=this._lBlock,s=this._rBlock,l=0,b=0;8>b;b++){l|=i[b][((s^v[b])&j[b])>>>0]}this._lBlock=s;this._rBlock=u^l}y=this._lBlock;this._lBlock=this._rBlock;this._rBlock=y;o.call(this,1,1431655765);g.call(this,8,16711935);g.call(this,2,858993459);o.call(this,16,65535);o.call(this,4,252645135);w[z]=this._lBlock;w[z+1]=this._rBlock},keySize:2,ivSize:2,blockSize:2});h.DES=f._createHelper(a);t=t.TripleDES=f.extend({_doReset:function(){var d=this._key.words;this._des1=a.createEncryptor(e.create(d.slice(0,2)));this._des2=a.createEncryptor(e.create(d.slice(2,4)));this._des3=a.createEncryptor(e.create(d.slice(4,6)))},encryptBlock:function(d,l){this._des1.encryptBlock(d,l);this._des2.decryptBlock(d,l);this._des3.encryptBlock(d,l)},decryptBlock:function(d,l){this._des3.decryptBlock(d,l);this._des2.encryptBlock(d,l);this._des1.decryptBlock(d,l)},keySize:6,ivSize:2,blockSize:2});h.TripleDES=f._createHelper(t)})();function myformatDate(v,format){if(!v){return""}var d=v;if(typeof v==="string"){if(v.indexOf("/Date(")>-1){d=new Date(parseInt(v.replace("/Date(","").replace(")/",""),10))}else{d=new Date(Date.parse(v.replace(/-/g,"/").replace("T"," ").split(".")[0]))}}else{if(typeof v==="number"){d=new Date(v)}}var o={"M+":d.getMonth()+1,"d+":d.getDate(),"h+":d.getHours(),"m+":d.getMinutes(),"s+":d.getSeconds(),"q+":Math.floor((d.getMonth()+3)/3),"S":d.getMilliseconds()};format=format||"yyyy-MM-dd";if(/(y+)/.test(format)){format=format.replace(RegExp.$1,(d.getFullYear()+"").substr(4-RegExp.$1.length))}for(var k in o){if(new RegExp("("+k+")").test(format)){format=format.replace(RegExp.$1,RegExp.$1.length==1?o[k]:("00"+o[k]).substr((""+o[k]).length))}}return format}var DES3={iv:function(){return myformatDate(new Date(),"yyyyMMdd")},encrypt:function(b,c,a){if(c){return(CryptoJS.TripleDES.encrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""},decrypt:function(b,c,a){if(c){return CryptoJS.enc.Utf8.stringify(CryptoJS.TripleDES.decrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""}};function decrypt(b,c,a){if(c){return CryptoJS.enc.Utf8.stringify(CryptoJS.TripleDES.decrypt(b,CryptoJS.enc.Utf8.parse(c),{iv:CryptoJS.enc.Utf8.parse(a||DES3.iv()),mode:CryptoJS.mode.CBC,padding:CryptoJS.pad.Pkcs7})).toString()}return""}function geneToken(size){var str="",arr=["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"];for(var i=0;i

在应用启动时加载js

private static Invocable inv;
    static {
        try {
            String jsScript = FileUtil.readString(new File("/Users/kevindai/Downloads/entry.js"), Charset.defaultCharset());
            ScriptEngineManager manager = new ScriptEngineManager();
            ScriptEngine engine = manager.getEngineByName("JavaScript");
            engine.eval(jsScript);
            inv = (Invocable) engine;
        } catch (ScriptException e) {
            e.printStackTrace();
        }
    }

编写生成参数以及解密代码

   private String geneToken() throws Exception{
        return (String)inv.invokeFunction("geneToken","24");
    }

    private String geneCiphertext() throws Exception{
        return (String)inv.invokeFunction("cipher");
    }


    private String decode(String value,String key) throws Exception{
        return (String)inv.invokeFunction("decrypt",value,key);
    }

编写docId爬取代码

@Test
    public void getDocId() throws Exception{
        RestTemplate restTemplate = ApplicationContextUtil.getBean(RestTemplate.class);

        Set result = new HashSet<>();
        for (int i = 1; i < 2; i++) {
            try{
                HttpHeaders headers = new HttpHeaders();
                headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED);
                headers.set("Host","wenshu.court.gov.cn");
                headers.set("Accept","application/json, text/javascript, */*; q=0.01");
                headers.set("X-Requested-With","XMLHttpRequest");
                headers.set("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36");
                headers.set("Origin","https://wenshu.court.gov.cn");
                headers.set("Sec-Fetch-Site","same-origin");
                headers.set("Sec-Fetch-Mode","cors");
                headers.set("Sec-Fetch-Dest","empty");
                headers.set("Referer","");//使用网页上的referer
                headers.set("Accept-Language","en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6");
                headers.set("Cookie","");//直接使用网页上的cookie



                MultiValueMap map= new LinkedMultiValueMap();
                map.add("pageId", "d91f41eae9a449a15ab899dde08cd0c5");
                map.add("s21", "经济犯罪");
                map.add("sortFields", "s50:desc");
                map.add("ciphertext", geneCiphertext());
                map.add("pageNum", String.valueOf(i));
                map.add("pageSize", "50");
                map.add("queryCondition", "[{\"key\":\"s21\",\"value\":\"经济犯罪\"}]");
                map.add("cfg", "com.lawyee.judge.dc.parse.dto.SearchDataDsoDTO@queryDoc");
                map.add("__RequestVerificationToken", geneToken());

                HttpEntity> entity = new HttpEntity>(map, headers);
                ResponseEntity response = restTemplate.exchange(URL, HttpMethod.POST, entity, String.class);
                if(response.getStatusCode() == HttpStatus.OK){
                    String body = response.getBody();
                    JSONObject resultJson = JSONUtil.parseObj(body);
                    String secretKey = resultJson.getStr("secretKey");
                    String value = resultJson.getStr("result");
                    String docBrief = decode(value, secretKey);
                    JSONObject relWenshu = JSONUtil.parseObj(docBrief).getJSONObject("relWenshu");
                    result.addAll(relWenshu.keySet());
                    TimeUnit.SECONDS.sleep(RandomUtils.nextInt(5,10));

                }
            }catch (Exception e){
                e.printStackTrace();
            }

        }
        CsvUtil.getWriter(new File("/Users/kevindai/Downloads/wenshuid1.csv"),Charset.defaultCharset(),false).write(result);
    }

我这里只是写了个demo,所以cookie是直接从网站上复制的,需要用在生产环境的同学可以先用selenium之类的框架模拟登陆拿到cookie,这样可以实行全流程的自动化

编写爬取文书完全的代码

List docIds = FileUtil.readLines("/Users/kevindai/Downloads/wenshuid1.csv", Charset.defaultCharset());
        ArrayList> result = new ArrayList<>();
        for (String docId : docIds) {
            try{
                RestTemplate restTemplate = ApplicationContextUtil.getBean(RestTemplate.class);

                HttpHeaders headers = new HttpHeaders();
                headers.set("Accept","application/json, text/javascript, */*; q=0.01");
                headers.set("X-Requested-With","XMLHttpRequest");
                headers.set("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36");
                headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED);
                headers.set("Origin","https://wenshu.court.gov.cn");
                headers.set("Sec-Fetch-Mode","cors");
                headers.set("Sec-Fetch-Dest","empty");
                headers.set("Sec-Fetch-Site","same-origin");
                headers.set("Referer","");//使用网页的referer
                headers.set("Accept-Encoding","gzip, deflate, br");
                headers.set("Accept-Language","en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6");
                headers.set("cookie", "");//使用网页的cookie

                MultiValueMap map= new LinkedMultiValueMap();
                map.add("docId", docId);
                map.add("ciphertext", geneCiphertext());
                map.add("cfg", "com.lawyee.judge.dc.parse.dto.SearchDataDsoDTO@docInfoSearch");
                map.add("__RequestVerificationToken", geneToken());

                HttpEntity> entity = new HttpEntity>(map, headers);
                ResponseEntity response = restTemplate.exchange(URL, HttpMethod.POST, entity, String.class);
                if(response.getStatusCode() == HttpStatus.OK){
                    String body = response.getBody();
                    JSONObject resultJson = JSONUtil.parseObj(body);
                    String secretKey = resultJson.getStr("secretKey");
                    String value = resultJson.getStr("result");
                    String wenchuContent = decode(value, secretKey);

                    JSONObject wenshuContentJson = JSONUtil.parseObj(wenchuContent);
                    String name = wenshuContentJson.getStr("s17");
                    String wenshuName = wenshuContentJson.getStr("s7");
                    String qwContent = wenshuContentJson.getStr("qwContent");
                    qwContent = Jsoup.parse(qwContent).text();

//                    FileUtils.write(new File("/Users/kevindai/Downloads/wenshu1/"+name+"-"+wenshuName+ ".txt"),qwContent, Charsets.UTF_8);
                    ArrayList list = Lists.newArrayList(name, wenshuName, qwContent);
                    result.add(list);
                    TimeUnit.SECONDS.sleep(RandomUtils.nextInt(5,10));

                }
            }catch (Exception e){
                e.printStackTrace();
            }
        }

        CsvUtil.getWriter(new File("/Users/kevindai/Downloads/wenshu1.csv"),Charset.defaultCharset()).write(result);
    }

ok,到这里代码就全部编写完成了,经过测试爬取是没问题的。此数据源用于一些风控场景还是挺有效的,希望能帮到你们。如果大家觉得什么网站加密很有趣也可以发给我研究一下啊

你可能感兴趣的:(反爬虫破解,个人笔记)