状态码412,521,cookie包含__jsl_clearance参数

状态码412,521,cookie包含__jsl_clearance参数

最近做爬虫碰见状态码为412和521的网站,分享一下经验
样例网站: http://kjj.hefei.gov.cn/public/column/2971?sub=&catId=6718761&nav=3&action=list&type=4&pageIndex=1

1.headers参数

用postman检测了一下,发现只需要UA,Host,Cookie参数
状态码412,521,cookie包含__jsl_clearance参数_第1张图片

2.cookie解密

cookie里面有四个参数

__jsluid_h  # 必要参数, id,是固定值
__jsl_clearance  # 必要参数, 时间参数 过期时间一小时, 需要解密
hefei_govd_SHIROJSESSIONID # 要不要都行
hefei_gova_SHIROJSESSIONID # 要不要都行

3. 加密手段

当不携带cookie或者cookie失效,返回值是js代码

<script>document.cookie=('_')+('_')+('j')+('s')+('l')+('_')+('c')+('l')+('e')+('a')+('r')+('a')+('n')+('c')+('e')+('=')+(-~[]+'')+([2]*(3)+'')+(1+1+'')+(-~(8)+'')+((1+[0])/[2]+'')+((1<<1)+'')+(2+5+'')+(-~[6]+'')+(~~[]+'')+(1+7+'')+('.')+((2^1)+'')+(1+6+'')+(-~false+'')+('|')+('-')+(-~{}+'')+('|')+('K')+('x')+('%')+((1<<1)+'')+('B')+('g')+((1+[2])/[2]+'')+('j')+('R')+('c')+('C')+('k')+('X')+('h')+('f')+('U')+(-~(4)+'')+('i')+((1<<2)+'')+('R')+('g')+('T')+('t')+('I')+('L')+('t')+('N')+(6+'')+('I')+('%')+((1|2)+'')+('D')+(';')+('m')+('a')+('x')+('-')+('a')+('g')+('e')+('=')+(3+'')+([2]*(3)+'')+(~~''+'')+((+false)+'')+(';')+('p')+('a')+('t')+('h')+('=')+('/');location.href=location.pathname+location.search</script>

使用 js2py模块执行js得到的值和正确的参数还是不一样的

__jsl_clearance=1629527914.622|-1|UpelWOBlBFk92c9jozkPo2IT9Wg%3D;max-age=3600;path=/  # 得到的
__jsl_clearance=1629527208.239|0|O1nnBGatvg%2FAZll65p%2F50TJ%2FcP8%3D;  # 正确的

但是我们还是要把它放入cookie继续请求

'Cookie': '__jsluid_h=2d08a6173999641305627ef610af78ad; __jsl_clearance=1629528058.336|-1|iHlrLmalv8EOFbuCwV%2FyuVuIb6g%3D;'

之后得到的返回值就会变成一堆混淆的js代码(解混淆的网站: http://tool.yuanrenxue.com/decode_obfuscator),其实js代码不解混淆也是可以执行的,不过建议解混淆看看js

# 解混淆后的js代码
function hash(_0x282b21) {
  var _0x51ca21 = 8;
  var _0x5088d5 = 0;

  function _0x4770af(_0x41139a, _0x4ffb52) {
    var _0x580521 = (_0x41139a & 65535) + (_0x4ffb52 & 65535);

    var _0x4ec84f = (_0x41139a >> 16) + (_0x4ffb52 >> 16) + (_0x580521 >> 16);

    return _0x4ec84f << 16 | _0x580521 & 65535;
  }

  function _0x4c4a1f(_0xb0ca9e, _0x1202c5) {
    return _0xb0ca9e >>> _0x1202c5 | _0xb0ca9e << 32 - _0x1202c5;
  }

  function _0x49173e(_0x354924, _0x233223) {
    return _0x354924 >>> _0x233223;
  }

  function _0x247089(_0x1edd5c, _0x689627, _0x542e48) {
    return _0x1edd5c & _0x689627 ^ ~_0x1edd5c & _0x542e48;
  }

  function _0x3c54ca(_0x927550, _0x362411, _0x4b2d82) {
    return _0x927550 & _0x362411 ^ _0x927550 & _0x4b2d82 ^ _0x362411 & _0x4b2d82;
  }

  function _0x3c84a5(_0x3ac28e) {
    return _0x4c4a1f(_0x3ac28e, 2) ^ _0x4c4a1f(_0x3ac28e, 13) ^ _0x4c4a1f(_0x3ac28e, 22);
  }

  function _0x42d8aa(_0x926a42) {
    return _0x4c4a1f(_0x926a42, 6) ^ _0x4c4a1f(_0x926a42, 11) ^ _0x4c4a1f(_0x926a42, 25);
  }

  function _0x2400ea(_0x3bfa29) {
    return _0x4c4a1f(_0x3bfa29, 7) ^ _0x4c4a1f(_0x3bfa29, 18) ^ _0x49173e(_0x3bfa29, 3);
  }

  function _0x7c34a(_0x40e308) {
    return _0x4c4a1f(_0x40e308, 17) ^ _0x4c4a1f(_0x40e308, 19) ^ _0x49173e(_0x40e308, 10);
  }

  function _0x400d54(_0x4e818f, _0x126b66) {
    var _0xd75256 = new Array(1116352408, 1899447441, 3049323471, 3921009573, 961987163, 1508970993, 2453635748, 2870763221, 3624381080, 310598401, 607225278, 1426881987, 1925078388, 2162078206, 2614888103, 3248222580, 3835390401, 4022224774, 264347078, 604807628, 770255983, 1249150122, 1555081692, 1996064986, 2554220882, 2821834349, 2952996808, 3210313671, 3336571891, 3584528711, 113926993, 338241895, 666307205, 773529912, 1294757372, 1396182291, 1695183700, 1986661051, 2177026350, 2456956037, 2730485921, 2820302411, 3259730800, 3345764771, 3516065817, 3600352804, 4094571909, 275423344, 430227734, 506948616, 659060556, 883997877, 958139571, 1322822218, 1537002063, 1747873779, 1955562222, 2024104815, 2227730452, 2361852424, 2428436474, 2756734187, 3204031479, 3329325298);

    var _0x239366 = new Array(1779033703, 3144134277, 1013904242, 2773480762, 1359893119, 2600822924, 528734635, 1541459225);

    var _0x5403b0 = new Array(64);

    var _0x296043, _0x5b4abc, _0x368065, _0x149d14, _0x3de04f, _0x5025db, _0xcb6f6b, _0x5a6121, _0x5ed3d4, _0x366e5c;

    var _0x2c0476, _0x564f41;

    _0x4e818f[_0x126b66 >> 5] |= 128 << 24 - _0x126b66 % 32;
    _0x4e818f[(_0x126b66 + 64 >> 9 << 4) + 15] = _0x126b66;

    for (var _0x5ed3d4 = 0; _0x5ed3d4 < _0x4e818f["length"]; _0x5ed3d4 += 16) {
      _0x296043 = _0x239366[0];
      _0x5b4abc = _0x239366[1];
      _0x368065 = _0x239366[2];
      _0x149d14 = _0x239366[3];
      _0x3de04f = _0x239366[4];
      _0x5025db = _0x239366[5];
      _0xcb6f6b = _0x239366[6];
      _0x5a6121 = _0x239366[7];

      for (var _0x366e5c = 0; _0x366e5c < 64; _0x366e5c++) {
        if (_0x366e5c < 16) {
          _0x5403b0[_0x366e5c] = _0x4e818f[_0x366e5c + _0x5ed3d4];
        } else {
          _0x5403b0[_0x366e5c] = _0x4770af(_0x4770af(_0x4770af(_0x7c34a(_0x5403b0[_0x366e5c - 2]), _0x5403b0[_0x366e5c - 7]), _0x2400ea(_0x5403b0[_0x366e5c - 15])), _0x5403b0[_0x366e5c - 16]);
        }

        _0x2c0476 = _0x4770af(_0x4770af(_0x4770af(_0x4770af(_0x5a6121, _0x42d8aa(_0x3de04f)), _0x247089(_0x3de04f, _0x5025db, _0xcb6f6b)), _0xd75256[_0x366e5c]), _0x5403b0[_0x366e5c]);
        _0x564f41 = _0x4770af(_0x3c84a5(_0x296043), _0x3c54ca(_0x296043, _0x5b4abc, _0x368065));
        _0x5a6121 = _0xcb6f6b;
        _0xcb6f6b = _0x5025db;
        _0x5025db = _0x3de04f;
        _0x3de04f = _0x4770af(_0x149d14, _0x2c0476);
        _0x149d14 = _0x368065;
        _0x368065 = _0x5b4abc;
        _0x5b4abc = _0x296043;
        _0x296043 = _0x4770af(_0x2c0476, _0x564f41);
      }

      _0x239366[0] = _0x4770af(_0x296043, _0x239366[0]);
      _0x239366[1] = _0x4770af(_0x5b4abc, _0x239366[1]);
      _0x239366[2] = _0x4770af(_0x368065, _0x239366[2]);
      _0x239366[3] = _0x4770af(_0x149d14, _0x239366[3]);
      _0x239366[4] = _0x4770af(_0x3de04f, _0x239366[4]);
      _0x239366[5] = _0x4770af(_0x5025db, _0x239366[5]);
      _0x239366[6] = _0x4770af(_0xcb6f6b, _0x239366[6]);
      _0x239366[7] = _0x4770af(_0x5a6121, _0x239366[7]);
    }

    return _0x239366;
  }

  function _0x343331(_0x10716c) {
    var _0x5b484f = Array();

    var _0x534e22 = 255;

    for (var _0x2ffc23 = 0; _0x2ffc23 < _0x10716c["length"] * _0x51ca21; _0x2ffc23 += _0x51ca21) {
      _0x5b484f[_0x2ffc23 >> 5] |= (_0x10716c["charCodeAt"](_0x2ffc23 / _0x51ca21) & _0x534e22) << 24 - _0x2ffc23 % 32;
    }

    return _0x5b484f;
  }

  function _0x566a77(_0xde6117) {
    var _0x41f052 = new RegExp("\n", "g");

    _0xde6117 = _0xde6117["replace"](_0x41f052, "\n");
    var _0x96352 = "";

    for (var _0x424bca = 0; _0x424bca < _0xde6117["length"]; _0x424bca++) {
      var _0x1aa748 = _0xde6117["charCodeAt"](_0x424bca);

      if (_0x1aa748 < 128) {
        _0x96352 += String["fromCharCode"](_0x1aa748);
      } else {
        if (_0x1aa748 > 127 && _0x1aa748 < 2048) {
          _0x96352 += String["fromCharCode"](_0x1aa748 >> 6 | 192);
          _0x96352 += String["fromCharCode"](_0x1aa748 & 63 | 128);
        } else {
          _0x96352 += String["fromCharCode"](_0x1aa748 >> 12 | 224);
          _0x96352 += String["fromCharCode"](_0x1aa748 >> 6 & 63 | 128);
          _0x96352 += String["fromCharCode"](_0x1aa748 & 63 | 128);
        }
      }
    }

    return _0x96352;
  }

  function _0x1ac467(_0x22b2af) {
    var _0x2c2f50 = "0123456789abcdef";
    var _0xbd5c45 = "";

    for (var _0x438f95 = 0; _0x438f95 < _0x22b2af["length"] * 4; _0x438f95++) {
      _0xbd5c45 += _0x2c2f50["charAt"](_0x22b2af[_0x438f95 >> 2] >> (3 - _0x438f95 % 4) * 8 + 4 & 15) + _0x2c2f50["charAt"](_0x22b2af[_0x438f95 >> 2] >> (3 - _0x438f95 % 4) * 8 & 15);
    }

    return _0xbd5c45;
  }

  _0x282b21 = _0x566a77(_0x282b21);
  return _0x1ac467(_0x400d54(_0x343331(_0x282b21), _0x282b21["length"] * _0x51ca21));
}

function go(_0x5f1112) {
  function _0xc085b4() {
    var _0x1ba685 = window["navigator"]["userAgent"],
        _0x454dcf = ["Phantom"];

    for (var _0x4b8e72 = 0; _0x4b8e72 < _0x454dcf["length"]; _0x4b8e72++) {
      if (_0x1ba685["indexOf"](_0x454dcf[_0x4b8e72]) != -1) {
        return true;
      }
    }

    if (window["callPhantom"] || window["_phantom"] || window["Headless"] || window["navigator"]["webdriver"] || window["navigator"]["__driver_evaluate"] || window["navigator"]["__webdriver_evaluate"]) {
      return true;
    }
  }

  if (_0xc085b4()) {
    return;
  }

  var _0x4b3b27 = new Date();

  function _0xb774d1(_0xa30a9, _0x5c14e5) {
    var _0x487adf = _0x5f1112["chars"]["length"];

    for (var _0xe2f4af = 0; _0xe2f4af < _0x487adf; _0xe2f4af++) {
      for (var _0x30416c = 0; _0x30416c < _0x487adf; _0x30416c++) {
        var _0x57005a = _0x5c14e5[0] + _0x5f1112["chars"]["substr"](_0xe2f4af, 1) + _0x5f1112["chars"]["substr"](_0x30416c, 1) + _0x5c14e5[1];

        if (hash(_0x57005a) == _0xa30a9) {
          return [_0x57005a, new Date() - _0x4b3b27];
        }
      }
    }
  }
  # 看这里   生成__jsl_clearance 参数,但不是我们要的
  var _0x4f1952 = _0xb774d1(_0x5f1112["ct"], _0x5f1112["bts"]);

  if (_0x4f1952) {
    var _0x284860;

    if (_0x5f1112["wt"]) {
      _0x284860 = parseInt(_0x5f1112["wt"]) > _0x4f1952[1] ? parseInt(_0x5f1112["wt"]) - _0x4f1952[1] : 500;
    } else {
      _0x284860 = 1500;
    }

    setTimeout(function () {
      # 注意!!!!!!!!!!!!!!!  这个才是我们要的参数  
      # cookie = '__jsl_clearance' + '=' + '1629099320.472|0|SPTpQ9Gg7vJaaFJ4bztrMJtj0dk%3D,41' + ";Max-age=" + "3600" + '; path = /'
      document["cookie"] = _0x5f1112["tn"] + "=" + _0x4f1952[0] + ";Max-age=" + _0x5f1112["vt"] + "; path = /";
      location["href"] = location["pathname"] + location["search"];
    }, _0x284860);
  } else {
    alert("\u8BF7\u6C42\u9A8C\u8BC1\xE5\xA4\xB1\xE8\xB4\xA5");
  }
}

go({
  "bts": ["1629099320.472|0|SPT", "9Gg7vJaaFJ4bztrMJtj0dk%3D"],
  "chars": "vtZFarnJNrxpdBnQAvwuSI",
  "ct": "3ffc40d3ffde4797d2dff8a2e8945b4371abd2797a01fc3c7c394dafa70fd866",
  "ha": "sha256",
  "tn": "__jsl_clearance",
  "vt": "3600",
  "wt": "1500"
});

其作用是传入参数得到正确的__jsl_clearance 参数, 建议看一下上面代码我加的注释,
执行混淆的js得到正确的参数

__jsl_clearance=1629529254.179|0|y0WI6GSgJwXWfK1xjKPwU8HsXFY%3D;Max-age=3600; path = /

4. UA校验

网站除了cookie校验还要ua校验,经过我多次测试,发现cookie和ua也是需要对应的,也就是说,在得到正确js之前,不能更换ua
状态码412,521,cookie包含__jsl_clearance参数_第2张图片

5. 代码展示

为了方便python调用js,修改了一些

const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const dom = new JSDOM(`

Hello world

`
); window = dom.window; document = window.document; XMLHttpRequest = window.XMLHttpRequest; function hash(_0x282b21) { var _0x51ca21 = 8; var _0x5088d5 = 0; var window = {}; function _0x4770af(_0x41139a, _0x4ffb52) { var _0x580521 = (_0x41139a & 65535) + (_0x4ffb52 & 65535); var _0x4ec84f = (_0x41139a >> 16) + (_0x4ffb52 >> 16) + (_0x580521 >> 16); return _0x4ec84f << 16 | _0x580521 & 65535; } function _0x4c4a1f(_0xb0ca9e, _0x1202c5) { return _0xb0ca9e >>> _0x1202c5 | _0xb0ca9e << 32 - _0x1202c5; } function _0x49173e(_0x354924, _0x233223) { return _0x354924 >>> _0x233223; } function _0x247089(_0x1edd5c, _0x689627, _0x542e48) { return _0x1edd5c & _0x689627 ^ ~_0x1edd5c & _0x542e48; } function _0x3c54ca(_0x927550, _0x362411, _0x4b2d82) { return _0x927550 & _0x362411 ^ _0x927550 & _0x4b2d82 ^ _0x362411 & _0x4b2d82; } function _0x3c84a5(_0x3ac28e) { return _0x4c4a1f(_0x3ac28e, 2) ^ _0x4c4a1f(_0x3ac28e, 13) ^ _0x4c4a1f(_0x3ac28e, 22); } function _0x42d8aa(_0x926a42) { return _0x4c4a1f(_0x926a42, 6) ^ _0x4c4a1f(_0x926a42, 11) ^ _0x4c4a1f(_0x926a42, 25); } function _0x2400ea(_0x3bfa29) { return _0x4c4a1f(_0x3bfa29, 7) ^ _0x4c4a1f(_0x3bfa29, 18) ^ _0x49173e(_0x3bfa29, 3); } function _0x7c34a(_0x40e308) { return _0x4c4a1f(_0x40e308, 17) ^ _0x4c4a1f(_0x40e308, 19) ^ _0x49173e(_0x40e308, 10); } function _0x400d54(_0x4e818f, _0x126b66) { var _0xd75256 = new Array(1116352408, 1899447441, 3049323471, 3921009573, 961987163, 1508970993, 2453635748, 2870763221, 3624381080, 310598401, 607225278, 1426881987, 1925078388, 2162078206, 2614888103, 3248222580, 3835390401, 4022224774, 264347078, 604807628, 770255983, 1249150122, 1555081692, 1996064986, 2554220882, 2821834349, 2952996808, 3210313671, 3336571891, 3584528711, 113926993, 338241895, 666307205, 773529912, 1294757372, 1396182291, 1695183700, 1986661051, 2177026350, 2456956037, 2730485921, 2820302411, 3259730800, 3345764771, 3516065817, 3600352804, 4094571909, 275423344, 430227734, 506948616, 659060556, 883997877, 958139571, 1322822218, 1537002063, 1747873779, 1955562222, 2024104815, 2227730452, 2361852424, 2428436474, 2756734187, 3204031479, 3329325298); var _0x239366 = new Array(1779033703, 3144134277, 1013904242, 2773480762, 1359893119, 2600822924, 528734635, 1541459225); var _0x5403b0 = new Array(64); var _0x296043, _0x5b4abc, _0x368065, _0x149d14, _0x3de04f, _0x5025db, _0xcb6f6b, _0x5a6121, _0x5ed3d4, _0x366e5c; var _0x2c0476, _0x564f41; _0x4e818f[_0x126b66 >> 5] |= 128 << 24 - _0x126b66 % 32; _0x4e818f[(_0x126b66 + 64 >> 9 << 4) + 15] = _0x126b66; for (var _0x5ed3d4 = 0; _0x5ed3d4 < _0x4e818f["length"]; _0x5ed3d4 += 16) { _0x296043 = _0x239366[0]; _0x5b4abc = _0x239366[1]; _0x368065 = _0x239366[2]; _0x149d14 = _0x239366[3]; _0x3de04f = _0x239366[4]; _0x5025db = _0x239366[5]; _0xcb6f6b = _0x239366[6]; _0x5a6121 = _0x239366[7]; for (var _0x366e5c = 0; _0x366e5c < 64; _0x366e5c++) { if (_0x366e5c < 16) { _0x5403b0[_0x366e5c] = _0x4e818f[_0x366e5c + _0x5ed3d4]; } else { _0x5403b0[_0x366e5c] = _0x4770af(_0x4770af(_0x4770af(_0x7c34a(_0x5403b0[_0x366e5c - 2]), _0x5403b0[_0x366e5c - 7]), _0x2400ea(_0x5403b0[_0x366e5c - 15])), _0x5403b0[_0x366e5c - 16]); } _0x2c0476 = _0x4770af(_0x4770af(_0x4770af(_0x4770af(_0x5a6121, _0x42d8aa(_0x3de04f)), _0x247089(_0x3de04f, _0x5025db, _0xcb6f6b)), _0xd75256[_0x366e5c]), _0x5403b0[_0x366e5c]); _0x564f41 = _0x4770af(_0x3c84a5(_0x296043), _0x3c54ca(_0x296043, _0x5b4abc, _0x368065)); _0x5a6121 = _0xcb6f6b; _0xcb6f6b = _0x5025db; _0x5025db = _0x3de04f; _0x3de04f = _0x4770af(_0x149d14, _0x2c0476); _0x149d14 = _0x368065; _0x368065 = _0x5b4abc; _0x5b4abc = _0x296043; _0x296043 = _0x4770af(_0x2c0476, _0x564f41); } _0x239366[0] = _0x4770af(_0x296043, _0x239366[0]); _0x239366[1] = _0x4770af(_0x5b4abc, _0x239366[1]); _0x239366[2] = _0x4770af(_0x368065, _0x239366[2]); _0x239366[3] = _0x4770af(_0x149d14, _0x239366[3]); _0x239366[4] = _0x4770af(_0x3de04f, _0x239366[4]); _0x239366[5] = _0x4770af(_0x5025db, _0x239366[5]); _0x239366[6] = _0x4770af(_0xcb6f6b, _0x239366[6]); _0x239366[7] = _0x4770af(_0x5a6121, _0x239366[7]); } return _0x239366; } function _0x343331(_0x10716c) { var _0x5b484f = Array(); var _0x534e22 = 255; for (var _0x2ffc23 = 0; _0x2ffc23 < _0x10716c["length"] * _0x51ca21; _0x2ffc23 += _0x51ca21) { _0x5b484f[_0x2ffc23 >> 5] |= (_0x10716c["charCodeAt"](_0x2ffc23 / _0x51ca21) & _0x534e22) << 24 - _0x2ffc23 % 32; } return _0x5b484f; } function _0x566a77(_0xde6117) { var _0x41f052 = new RegExp("\n", "g"); _0xde6117 = _0xde6117["replace"](_0x41f052, "\n"); var _0x96352 = ""; for (var _0x424bca = 0; _0x424bca < _0xde6117["length"]; _0x424bca++) { var _0x1aa748 = _0xde6117["charCodeAt"](_0x424bca); if (_0x1aa748 < 128) { _0x96352 += String["fromCharCode"](_0x1aa748); } else { if (_0x1aa748 > 127 && _0x1aa748 < 2048) { _0x96352 += String["fromCharCode"](_0x1aa748 >> 6 | 192); _0x96352 += String["fromCharCode"](_0x1aa748 & 63 | 128); } else { _0x96352 += String["fromCharCode"](_0x1aa748 >> 12 | 224); _0x96352 += String["fromCharCode"](_0x1aa748 >> 6 & 63 | 128); _0x96352 += String["fromCharCode"](_0x1aa748 & 63 | 128); } } } return _0x96352; } function _0x1ac467(_0x22b2af) { var _0x2c2f50 = "0123456789abcdef"; var _0xbd5c45 = ""; for (var _0x438f95 = 0; _0x438f95 < _0x22b2af["length"] * 4; _0x438f95++) { _0xbd5c45 += _0x2c2f50["charAt"](_0x22b2af[_0x438f95 >> 2] >> (3 - _0x438f95 % 4) * 8 + 4 & 15) + _0x2c2f50["charAt"](_0x22b2af[_0x438f95 >> 2] >> (3 - _0x438f95 % 4) * 8 & 15); } return _0xbd5c45; } _0x282b21 = _0x566a77(_0x282b21); return _0x1ac467(_0x400d54(_0x343331(_0x282b21), _0x282b21["length"] * _0x51ca21)); } function go(_0x5f1112) { function _0xc085b4() { var _0x1ba685 = window["navigator"]["userAgent"], _0x454dcf = ["Phantom"]; for (var _0x4b8e72 = 0; _0x4b8e72 < _0x454dcf["length"]; _0x4b8e72++) { if (_0x1ba685["indexOf"](_0x454dcf[_0x4b8e72]) != -1) { return true; } } if (window["callPhantom"] || window["_phantom"] || window["Headless"] || window["navigator"]["webdriver"] || window["navigator"]["__driver_evaluate"] || window["navigator"]["__webdriver_evaluate"]) { return true; } } if (_0xc085b4()) { return; } var _0x4b3b27 = new Date(); function _0xb774d1(_0xa30a9, _0x5c14e5) { var _0x487adf = _0x5f1112["chars"]["length"]; for (var _0xe2f4af = 0; _0xe2f4af < _0x487adf; _0xe2f4af++) { for (var _0x30416c = 0; _0x30416c < _0x487adf; _0x30416c++) { var _0x57005a = _0x5c14e5[0] + _0x5f1112["chars"]["substr"](_0xe2f4af, 1) + _0x5f1112["chars"]["substr"](_0x30416c, 1) + _0x5c14e5[1]; if (hash(_0x57005a) == _0xa30a9) { return [_0x57005a, new Date() - _0x4b3b27]; } } } } var _0x4f1952 = _0xb774d1(_0x5f1112["ct"], _0x5f1112["bts"]); if (_0x4f1952) { var _0x284860; if (_0x5f1112["wt"]) { _0x284860 = parseInt(_0x5f1112["wt"]) > _0x4f1952[1] ? parseInt(_0x5f1112["wt"]) - _0x4f1952[1] : 500; } else { _0x284860 = 1500; } return _0x5f1112["tn"] + "=" + _0x4f1952[0] + ";Max-age=" + _0x5f1112["vt"] + "; path = /"; /*setTimeout(function () { document["cookie"] = _0x5f1112["tn"] + "=" + _0x4f1952[0] + ";Max-age=" + _0x5f1112["vt"] + "; path = /"; location["href"] = location["pathname"] + location["search"]; }, _0x284860); */ } else { // alert("\u8BF7\u6C42\u9A8C\u8BC1\xE5\xA4\xB1\xE8\xB4\xA5"); return '' } }

python代码

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import re
import time
from faker import Factory
import execjs
import requests
import js2py
from lxml import etree
import json

url = 'http://kjj.hefei.gov.cn/public/column/2971?sub=&catId=6718761&nav=3&action=list&type=4&pageIndex=1'

def B():
    headers = {
    'Cookie' :'__jsluid_h=2d08a6173999641305627ef610af78ad; __jsl_clearance=1629450531.317|0|T68aGoBUCe0AodUL59%2BiqR%2BmItM%3D;',
    "Host": "kjj.hefei.gov.cn",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36",
    }
    response = requests.get(url, headers=headers, timeout=3)
    js1 = re.findall('', response.content.decode())[0].replace('document.cookie=', '').replace(
        'location.href=location.pathname+location.search', '')
    context = js2py.EvalJs()
    context.execute('cookies2 =' + js1)
    cookies = context.cookies2.split(';')[0].split('=')
    headers['Cookie'] = '__jsluid_h=2d08a6173999641305627ef610af78ad; {}={};'.format(cookies[0], cookies[1])
    cookie_js = requests.get(url, headers=headers).text
    cookie_param = json.loads(re.search(r'go\(({.+?})\)', cookie_js).group(1))
    with open(r'js.js', 'r', encoding='utf-8') as f:
        js = f.read()
    while True:
        ct = execjs.compile(js, cwd=r'C:\Users\Datong\AppData\Roaming\npm\node_modules')
        cookie = ct.call('go', cookie_param).split(';')[0] + ';'
        if '__jsl_clearance=' in cookie:
            break
        time.sleep(1)
        B()
    headers['Cookie'] = '__jsluid_h=2d08a6173999641305627ef610af78ad; {}'.format(cookie)
    response = requests.get(url, headers=headers, timeout=3).text
    response = etree.HTML(response).xpath('//ul[@class="clearfix xxgk_nav_list"]/li')
    for x in response:
        print(x.xpath('.//a/@title')[0])


if __name__ == '__main__':
    B()
参考链接:
	https://blog.csdn.net/qq_39138295/article/details/100705405

你可能感兴趣的:(爬虫,python)