python爬虫js逆向某恩数据-电影年度票房数据des解密

今天我们爬取某恩数据-电影年度票房数据:
python爬虫js逆向某恩数据-电影年度票房数据des解密_第1张图片


1.分析:

很明显, 该网站的数据是经过加密的. 接下来. 我们到Initiator里看看.
python爬虫js逆向某恩数据-电影年度票房数据des解密_第2张图片
python爬虫js逆向某恩数据-电影年度票房数据des解密_第3张图片
python爬虫js逆向某恩数据-电影年度票房数据des解密_第4张图片
进入该嫌疑函数:
python爬虫js逆向某恩数据-电影年度票房数据des解密_第5张图片


2.逆向:

先将代码还原:

// 以下为源码的还原:
function (_0xa0c834) {
    var b = {
        'pKENi': function _0x2f627(_0x5b6f5a, _0x440924) {
            return _0x5b6f5a === _0x440924;
        },
        'wnfPa': 'ZGz',
        'VMmle': '7|1|8|9|5|2|3|6|0|4',
        'GKWFf': function _0x1a4e13(_0x40cfde, _0x16f3c2) {
            return _0x40cfde == _0x16f3c2;
        },
        'MUPgQ': function _0x342f0d(_0x19038b, _0x4004d6) {
            return _0x19038b >= _0x4004d6;
        },
        'hLXma': function _0x55adaf(_0x45a871, _0x161bdf) {
            return _0x45a871 + _0x161bdf;
        },
        'JdOlO': function _0x13e00a(_0x5899a9, _0x4bb34d) {
            return _0x5899a9 + _0x4bb34d;
        },
        'qrTpg': function _0x1198fb(_0x55b317, _0x22e1db, _0x1b091a) {
            return _0x55b317(_0x22e1db, _0x1b091a);
        },
        'pdmMk': function _0xe2b022(_0x4af286, _0x4c2fd4) {
            return _0x4af286 - _0x4c2fd4;
        },
        'xVKWW': function _0x1094a3(_0x5f3627, _0x2a0ac5, _0x3ad2e5) {
            return _0x5f3627(_0x2a0ac5, _0x3ad2e5);
        }
    };

    var _0x9843d3 = function(_0x29d556, _0xcc6df, _0x3d7020) {
        if (0x0 == _0xcc6df)
            return _0x29d556['substr'](_0x3d7020);
        var _0x48914b;
        _0x48914b = '' + _0x29d556['substr'](0x0, _0xcc6df);
        return _0x48914b += _0x29d556['substr'](_0x4da59e['bUIIa'](_0xcc6df, _0x3d7020));
    }
    var _0x4da59e = {
        'bUIIa': function _0x2a2af9(_0x779387, _0x4a4fec) {
            return _0x779387 + _0x4a4fec;
        }
    }


        var a = b['VMmle']['split']('|'), _0x356b01 = 0;

        while (!![]) {
            switch (a[_0x356b01++]) {
                case '0':
                    _0x554c90 = _grsa_JS['DES']['decrypt']({
                        'ciphertext': _grsa_JS['enc']['Hex']['parse'](_0xa0c834)
                    }, _0x2cf8ae, {
                        'iv': _0x554c90,
                        'mode': _grsa_JS['mode']['ECB'],
                        'padding': _grsa_JS['pad']['Pkcs7']
                    })['toString'](_grsa_JS['enc']['Utf8']);
                    continue;
                case '1':
                    if (b['GKWFf'](null, _0xa0c834) || b['MUPgQ'](0x10, _0xa0c834['length']))
                        return _0xa0c834;
                    continue;
                case '2':
                    _0xa0c834 = _0x9843d3(_0xa0c834, _0x2cf8ae, 0x8);
                    continue;
                case '3':
                    _0x2cf8ae = _grsa_JS['enc']['Utf8']['parse'](_0x554c90);
                    continue;
                case '4':
                    return _0x554c90['substring'](0x0, b['hLXma'](_0x554c90['lastIndexOf']('}'), 0x1));
                case '5':
                    _0x554c90 = _0xa0c834['substr'](_0x2cf8ae, 0x8);
                    continue;
                case '6':
                    _0x554c90 = _grsa_JS['enc']['Utf8']['parse'](_0x554c90);
                    continue;
                case '7':
                    if (!navigator || !navigator['userAgent'])
                        return '';
                    continue;
                case '8':
                    var _0x554c90 = b['JdOlO'](b['qrTpg'](parseInt, _0xa0c834[b['pdmMk'](_0xa0c834['length'], 0x1)], 0x10), 0x9)
                        , _0x2cf8ae = b['xVKWW'](parseInt, _0xa0c834[_0x554c90], 0x10);
                    continue;
                case '9':
                    _0xa0c834 = _0x9843d3(_0xa0c834, _0x554c90, 0x1);
                    continue;
            }
            break;
        }
}

之后整理逻辑,逆向得:

var fn = function(c_a, c_b, c_c) {
    if (0x0 == c_b)
        return c_a["substr"](c_c);
    var r;
    r = '' + c_a['substr'](0x0, c_b);
    return r += c_a['substr'](c_b + c_c);
};


this.shell = function(data) {  
   
    var a = parseInt(data[data.length-1], 10) + 9   
    var b = parseInt(data[a], 10);


    data = fn(data, a, 1);
    a = data['substr'](b, 8);  
    data = fn(data, b, 8);


    b = _grsa_JS['enc']['Utf8']['parse'](a);
    a = _grsa_JS['enc']['Utf8']['parse'](a);

    a = _grsa_JS.DES.decrypt({  
                        'ciphertext': _grsa_JS['enc']['Hex']['parse'](data)
                    }, b, {
                        'iv': a,
                        'mode': _grsa_JS.mode.ECB,
                        'padding': _grsa_JS['pad']['Pkcs7']
                    })['toString'](_grsa_JS['enc']['Utf8']);

    
    return a['substring'](0, a['lastIndexOf']('}') + 0x1); 
}

3.python代码整合:

已经分析得到了以上是浏览器js代码的运行逻辑,我们现在通过python逆向改代码逻辑:

import binascii  

import requests
from Crypto.Cipher import DES

url = "https://www.endata.com.cn/API/GetData.ashx"
data = {
    "year": "2020",
    "MethodName": "BoxOffice_GetYearInfoData"
}
resp = requests.post(url, data=data)
data = resp.text



def fn(c1, c2, c3):
    if 0 == c2:
        return c1[c3:]
    r = c1[:c2]
    r += c1[c2 + c3:]
    return r


a = int(data[-1], 16) + 9
b = int(data[a], 16)

data = fn(data, a, 1)
a = data[b: b+8]
data = fn(data, b, 8)

b = a.encode("utf-8")
a = a.encode("utf-8")

ds = binascii.a2b_hex(data)  

des = DES.new(b, mode=DES.MODE_ECB)
result = des.decrypt(ds)

print(result.decode("utf-8"))

补环境方案:

以上的代码逆向具有一定难度,这里尝试提供补js环境的方式来实现解密逻辑:
直接运行浏览器扣下来的代码:

// 以下为源码的还原:
function (_0xa0c834) {
    var b = {
        'pKENi': function _0x2f627(_0x5b6f5a, _0x440924) {
            return _0x5b6f5a === _0x440924;
        },
        'wnfPa': 'ZGz',
        'VMmle': '7|1|8|9|5|2|3|6|0|4',
        'GKWFf': function _0x1a4e13(_0x40cfde, _0x16f3c2) {
            return _0x40cfde == _0x16f3c2;
        },
        'MUPgQ': function _0x342f0d(_0x19038b, _0x4004d6) {
            return _0x19038b >= _0x4004d6;
        },
        'hLXma': function _0x55adaf(_0x45a871, _0x161bdf) {
            return _0x45a871 + _0x161bdf;
        },
        'JdOlO': function _0x13e00a(_0x5899a9, _0x4bb34d) {
            return _0x5899a9 + _0x4bb34d;
        },
        'qrTpg': function _0x1198fb(_0x55b317, _0x22e1db, _0x1b091a) {
            return _0x55b317(_0x22e1db, _0x1b091a);
        },
        'pdmMk': function _0xe2b022(_0x4af286, _0x4c2fd4) {
            return _0x4af286 - _0x4c2fd4;
        },
        'xVKWW': function _0x1094a3(_0x5f3627, _0x2a0ac5, _0x3ad2e5) {
            return _0x5f3627(_0x2a0ac5, _0x3ad2e5);
        }
    };

    var _0x9843d3 = function(_0x29d556, _0xcc6df, _0x3d7020) {
        if (0x0 == _0xcc6df)
            return _0x29d556['substr'](_0x3d7020);
        var _0x48914b;
        _0x48914b = '' + _0x29d556['substr'](0x0, _0xcc6df);
        return _0x48914b += _0x29d556['substr'](_0x4da59e['bUIIa'](_0xcc6df, _0x3d7020));
    }
    var _0x4da59e = {
        'bUIIa': function _0x2a2af9(_0x779387, _0x4a4fec) {
            return _0x779387 + _0x4a4fec;
        }
    }


        var a = b['VMmle']['split']('|'), _0x356b01 = 0;

        while (!![]) {
            switch (a[_0x356b01++]) {
                case '0':
                    _0x554c90 = _grsa_JS['DES']['decrypt']({
                        'ciphertext': _grsa_JS['enc']['Hex']['parse'](_0xa0c834)
                    }, _0x2cf8ae, {
                        'iv': _0x554c90,
                        'mode': _grsa_JS['mode']['ECB'],
                        'padding': _grsa_JS['pad']['Pkcs7']
                    })['toString'](_grsa_JS['enc']['Utf8']);
                    continue;
                case '1':
                    if (b['GKWFf'](null, _0xa0c834) || b['MUPgQ'](0x10, _0xa0c834['length']))
                        return _0xa0c834;
                    continue;
                case '2':
                    _0xa0c834 = _0x9843d3(_0xa0c834, _0x2cf8ae, 0x8);
                    continue;
                case '3':
                    _0x2cf8ae = _grsa_JS['enc']['Utf8']['parse'](_0x554c90);
                    continue;
                case '4':
                    return _0x554c90['substring'](0x0, b['hLXma'](_0x554c90['lastIndexOf']('}'), 0x1));
                case '5':
                    _0x554c90 = _0xa0c834['substr'](_0x2cf8ae, 0x8);
                    continue;
                case '6':
                    _0x554c90 = _grsa_JS['enc']['Utf8']['parse'](_0x554c90);
                    continue;
                case '7':
                    if (!navigator || !navigator['userAgent'])
                        return '';
                    continue;
                case '8':
                    var _0x554c90 = b['JdOlO'](b['qrTpg'](parseInt, _0xa0c834[b['pdmMk'](_0xa0c834['length'], 0x1)], 0x10), 0x9)
                        , _0x2cf8ae = b['xVKWW'](parseInt, _0xa0c834[_0x554c90], 0x10);
                    continue;
                case '9':
                    _0xa0c834 = _0x9843d3(_0xa0c834, _0x554c90, 0x1);
                    continue;
            }
            break;
        }
}

报错:
在这里插入图片描述
在这里插入图片描述

这里因为node环境中并没有浏览器的navigator,我们先查看是哪里调用了navigator:在case的位置

case '7':
          if (!navigator || !navigator['userAgent'])
          return '';
          continue;

分析代码得知,这里是判断navigator是否存在及navigator里是否有userAgent:
我们直接在代码前面补上这个navigator环境以及js第三方库:
python爬虫js逆向某恩数据-电影年度票房数据des解密_第6张图片发现就可以运行了,成功解密:
python爬虫js逆向某恩数据-电影年度票房数据des解密_第7张图片


你可能感兴趣的:(js逆向,python,爬虫,javascript)