爬取抖音视频(三)

下面我们来抓,信任信息界面数据

一,抓取接口

视频链接也是通过分享出来的点击
爬取抖音视频(三)_第1张图片
下面来分析参数
爬取抖音视频(三)_第2张图片

  1. user_id : 这个在分享出来的链接里面有,看参数名都知道是代表什么了。
  2. max_cursor :这个第一次是0,之后需要取剩余列表的时候应该就要用上一次请求得到的JSON数据中的“max_cursor”了。
  3. aid:不清楚用途,直接跟着用1128。
  4. _signature:签名
  5. dytk:上个帖子已经说明了

怎么解 _signature点击
这里有详细的说明,我在上面的基础上做了一些优化

二、signature生成算法

function generateSignature(userId) {
    this.navigator = {
        userAgent: "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1"
    }
    var e = {}

    var r = (function () {
        function e(e, a, r) {
            return (b[e] || (b[e] = t("x,y", "return x " + e + " y")))(r, a)
        }

        function a(e, a, r) {
            return (k[r] || (k[r] = t("x,y", "return new x[y](" + Array(r + 1).join(",x[++y]").substr(1) + ")")))(e, a)
        }

        function r(e, a, r) {
            var n, t, s = {}, b = s.d = r ? r.d + 1 : 0;
            for (s["$" + b] = s, t = 0; t < b; t++) s[n = "$" + t] = r[n];
            for (t = 0, b = s.length = a.length; t < b; t++) s[t] = a[t];
            return c(e, 0, s)
        }

        function c(t, b, k) {
            function u(e) {
                v[x++] = e
            }

            function f() {
                return g = t.charCodeAt(b++) - 32, t.substring(b, b += g)
            }

            function l() {
                try {
                    y = c(t, b, k)
                } catch (e) {
                    h = e, y = l
                }
            }

            for (var h, y, d, g, v = [], x = 0; ;) switch (g = t.charCodeAt(b++) - 32) {
                case 1:
                    u(!v[--x]);
                    break;
                case 4:
                    v[x++] = f();
                    break;
                case 5:
                    u(function (e) {
                        var a = 0, r = e.length;
                        return function () {
                            var c = a < r;
                            return c && u(e[a++]), c
                        }
                    }(v[--x]));
                    break;
                case 6:
                    y = v[--x], u(v[--x](y));
                    break;
                case 8:
                    if (g = t.charCodeAt(b++) - 32, l(), b += g, g = t.charCodeAt(b++) - 32, y === c) b += g; else if (y !== l) return y;
                    break;
                case 9:
                    v[x++] = c;
                    break;
                case 10:
                    u(s(v[--x]));
                    break;
                case 11:
                    y = v[--x], u(v[--x] + y);
                    break;
                case 12:
                    for (y = f(), d = [], g = 0; g < y.length; g++) d[g] = y.charCodeAt(g) ^ g + y.length;
                    u(String.fromCharCode.apply(null, d));
                    break;
                case 13:
                    y = v[--x], h = delete v[--x][y];
                    break;
                case 14:
                    v[x++] = t.charCodeAt(b++) - 32;
                    break;
                case 59:
                    u((g = t.charCodeAt(b++) - 32) ? (y = x, v.slice(x -= g, y)) : []);
                    break;
                case 61:
                    u(v[--x][t.charCodeAt(b++) - 32]);
                    break;
                case 62:
                    g = v[--x], k[0] = 65599 * k[0] + k[1].charCodeAt(g) >>> 0;
                    break;
                case 65:
                    h = v[--x], y = v[--x], v[--x][y] = h;
                    break;
                case 66:
                    u(e(t[b++], v[--x], v[--x]));
                    break;
                case 67:
                    y = v[--x], d = v[--x], u((g = v[--x]).x === c ? r(g.y, y, k) : g.apply(d, y));
                    break;
                case 68:
                    u(e((g = t[b++]) < "<" ? (b--, f()) : g + g, v[--x], v[--x]));
                    break;
                case 70:
                    u(!1);
                    break;
                case 71:
                    v[x++] = n;
                    break;
                case 72:
                    v[x++] = +f();
                    break;
                case 73:
                    u(parseInt(f(), 36));
                    break;
                case 75:
                    if (v[--x]) {
                        b++;
                        break
                    }
                case 74:
                    g = t.charCodeAt(b++) - 32 << 16 >> 16, b += g;
                    break;
                case 76:
                    u(k[t.charCodeAt(b++) - 32]);
                    break;
                case 77:
                    y = v[--x], u(v[--x][y]);
                    break;
                case 78:
                    g = t.charCodeAt(b++) - 32, u(a(v, x -= g + 1, g));
                    break;
                case 79:
                    g = t.charCodeAt(b++) - 32, u(k["$" + g]);
                    break;
                case 81:
                    h = v[--x], v[--x][f()] = h;
                    break;
                case 82:
                    u(v[--x][f()]);
                    break;
                case 83:
                    h = v[--x], k[t.charCodeAt(b++) - 32] = h;
                    break;
                case 84:
                    v[x++] = !0;
                    break;
                case 85:
                    v[x++] = void 0;
                    break;
                case 86:
                    u(v[x - 1]);
                    break;
                case 88:
                    h = v[--x], y = v[--x], v[x++] = h, v[x++] = y;
                    break;
                case 89:
                    u(function () {
                        function e() {
                            return r(e.y, arguments, k)
                        }

                        return e.y = f(), e.x = c, e
                    }());
                    break;
                case 90:
                    v[x++] = null;
                    break;
                case 91:
                    v[x++] = h;
                    break;
                case 93:
                    h = v[--x];
                    break;
                case 0:
                    return v[--x];
                default:
                    u((g << 16 >> 16) - 16)
            }
        }

        var n = this, t = n.Function, s = Object.keys || function (e) {
            var a = {}, r = 0;
            for (var c in e) a[r++] = c;
            return a.length = r, a
        }, b = {}, k = {};
        return r
    })()
    ('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d, [e])
    return e.sign(userId)
}

console.log(generateSignature(75984155221))

算法出来了怎么调用了?
这里需要使用pyexecjs模块具体怎么使用pyexecjs的使用
上面的js代码放入signature.js文件里
下面是实现的代码:

import execjs


def sign(whale_id):
    with open('signature.js','r')as f:
        js = execjs.compile(f.read())
        signs = js.call('generateSignature',whale_id)
        return signs


if __name__ == '__main__':
    print(sign(6734207818456501516))

运行结果:

7puVZgAAs.cRZGqZYLlnke6blX

三、抓取数据

剩下就不用我多说了,直接添加参数请求数据就行。下面就直接写代码了

import requests
import execjs


def sign(whale_id):
    with open('jj.js', 'r')as f:
        js = execjs.compile(f.read())
        signs = js.call('generateSignature', whale_id)
        return signs


if __name__ == '__main__':
    url = 'https://www.iesdouyin.com/web/api/v2/aweme/post'
    data = {
        'sec_uid': 'MS4wLjABAAAAOyyOOiqBYiPo3uIHuTrqE6OeAY2NJ3dIRBA63mn4fFA',
        'count': '21',
        'max_cursor': '0',
        'aid': '1128',
        '_signature': 'sVawxAao7w7oAJOmwOR.v7FWt',
        'dytk': '5b68a25b4dac4940285aa00d8abec34e'
    }
    headers = {
        'referer': 'https://www.iesdouyin.com/share/user/496625282450126?sec_uid=MS4wLjABAAAAOyyOOiqBYiPo3uIHuTrqE6OeAY2NJ3dIRBA63mn4fFA×tamp=1568014650&utm_source=copy&utm_campaign=client_share&utm_medium=android&share_app_name=douyin',
        'Sec-Fetch-Mode': 'cors',
        'Accept': 'application/json',
        'authority': 'www.iesdouyin.com',
        'path': '/web/api/v2/aweme/post/?sec_uid=MS4wLjABAAAAOyyOOiqBYiPo3uIHuTrqE6OeAY2NJ3dIRBA63mn4fFA&count=21&max_cursor=0&aid=1128&_signature=vCSJvxAQ4WEz5y3OjH0lSLwkia&dytk=5b68a25b4dac4940285aa00d8abec34e',
        'cookie': 'tt_webid=6734472189158344203; _ba=BA0.2-20190909-5199e-UGNwPi7sl3644Ticb5yC; _ga=GA1.2.483273212.1567991500; _gid=GA1.2.591824214.1567991500',
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
        'X-Requested-With': 'XMLHttpRequest',
    }
    data['_signature'] = sign('496625282450126')
    r = requests.get(url, params=data, headers=headers)
    print(r.text)

下面是效果
爬取抖音视频(三)_第3张图片
爬取抖音(一)
爬取抖音(二)

你可能感兴趣的:(爬虫)