爬虫-------爬抖音

douyin.html


<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Titletitle>
head>
<body>
<script>
    !function (t) {
        if (t.__M = t.__M || {},
            !t.__M.require) {
            var e, n, r = document.getElementsByTagName("head")[0], i = {}, o = {}, a = {}, u = {}, c = {}, s = {},
                l = function (t, n) {
                    if (!(t in u)) {
                        u[t] = !0;
                        var i = document.createElement("script");
                        if (n) {
                            var o = setTimeout(n, e.timeout);
                            i.onerror = function () {
                                clearTimeout(o),
                                    n()
                            }
                            ;
                            var a = function () {
                                clearTimeout(o)
                            };
                            "onload" in i ? i.onload = a : i.onreadystatechange = function () {
                                ("loaded" === this.readyState || "complete" === this.readyState) && a()
                            }
                        }
                        return i.type = "text/javascript",
                            i.src = t,
                            r.appendChild(i),
                            i
                    }
                }, f = function (t, e, n) {
                    var r = i[t] || (i[t] = []);
                    r.push(e);
                    var o, a = c[t] || c[t + ".js"] || {}, u = a.pkg;
                    o = u ? s[u].url || s[u].uri : a.url || a.uri || t,
                        l(o, n && function () {
                            n(t)
                        }
                        )
                };
            n = function (t, e) {
                "function" != typeof e && (e = arguments[2]),
                    t = t.replace(/\.js$/i, ""),
                    o[t] = e;
                var n = i[t];
                if (n) {
                    for (var r = 0, a = n.length; a > r; r++)
                        n[r]();
                    delete i[t]
                }
            }
                ,
                e = function (t) {
                    if (t && t.splice)
                        return e.async.apply(this, arguments);
                    t = e.alias(t);
                    var n = a[t];
                    if (n)
                        return n.exports;
                    var r = o[t];
                    if (!r)
                        throw "[ModJS] Cannot find module `" + t + "`";
                    n = a[t] = {
                        exports: {}
                    };
                    var i = "function" == typeof r ? r.apply(n, [e, n.exports, n]) : r;
                    return i && (n.exports = i),
                    n.exports && !n.exports["default"] && Object.defineProperty && Object.isExtensible(n.exports) && Object.defineProperty(n.exports, "default", {
                        value: n.exports
                    }),
                        n.exports
                }
                ,
                e.async = function (n, r, i) {
                    function a(t) {
                        for (var n, r = 0, h = t.length; h > r; r++) {
                            var p = e.alias(t[r]);
                            p in o ? (n = c[p] || c[p + ".js"],
                            n && "deps" in n && a(n.deps)) : p in s || (s[p] = !0,
                                l++,
                                f(p, u, i),
                                n = c[p] || c[p + ".js"],
                            n && "deps" in n && a(n.deps))
                        }
                    }

                    function u() {
                        if (0 === l--) {
                            for (var i = [], o = 0, a = n.length; a > o; o++)
                                i[o] = e(n[o]);
                            r && r.apply(t, i)
                        }
                    }

                    "string" == typeof n && (n = [n]);
                    var s = {}
                        , l = 0;
                    a(n),
                        u()
                }
                ,
                e.resourceMap = function (t) {
                    var e, n;
                    n = t.res;
                    for (e in n)
                        n.hasOwnProperty(e) && (c[e] = n[e]);
                    n = t.pkg;
                    for (e in n)
                        n.hasOwnProperty(e) && (s[e] = n[e])
                }
                ,
                e.loadJs = function (t) {
                    l(t)
                }
                ,
                e.loadCss = function (t) {
                    if (t.content) {
                        var e = document.createElement("style");
                        e.type = "text/css",
                            e.styleSheet ? e.styleSheet.cssText = t.content : e.innerHTML = t.content,
                            r.appendChild(e)
                    } else if (t.url) {
                        var n = document.createElement("link");
                        n.href = t.url,
                            n.rel = "stylesheet",
                            n.type = "text/css",
                            r.appendChild(n)
                    }
                }
                ,
                e.alias = function (t) {
                    return t.replace(/\.js$/i, "")
                }
                ,
                e.timeout = 5e3,
                t.__M.define = n,
                t.__M.require = e
        }
    }(this)

    __M.define("douyin_falcon:node_modules/byted-acrawler/dist/runtime", function (l, e) {
        Function(function (l) {
            return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g, function (e) {
                return l[15 & e.charCodeAt(0)]
            })
        }("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d, [Object.defineProperty(e, "__esModule", {value: !0})])
    });

    _bytedAcrawler = __M.require("douyin_falcon:node_modules/byted-acrawler/dist/runtime");
    signature = _bytedAcrawler.sign('58841646784')
    console.log(signature);
script>
body>
html>



s1.js

navigator = {
    userAgent:"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
}

!function (t) {
    if (t.__M = t.__M || {},
        !t.__M.require) {
        var e, n, r = "  快来加入抖音短视频,让你发现最有趣的我!       \n" +
            "\n" +
            "\n" +
            "\n" +
            "", i = {}, o = {}, a = {}, u = {}, c = {}, s = {},
            l = function (t, n) {
                if (!(t in u)) {
                    u[t] = !0;
                    var i = document.createElement("script");
                    if (n) {
                        var o = setTimeout(n, e.timeout);
                        i.onerror = function () {
                            clearTimeout(o),
                                n()
                        }
                        ;
                        var a = function () {
                            clearTimeout(o)
                        };
                        "onload" in i ? i.onload = a : i.onreadystatechange = function () {
                            ("loaded" === this.readyState || "complete" === this.readyState) && a()
                        }
                    }
                    return i.type = "text/javascript",
                        i.src = t,
                        r.appendChild(i),
                        i
                }
            }, f = function (t, e, n) {
                var r = i[t] || (i[t] = []);
                r.push(e);
                var o, a = c[t] || c[t + ".js"] || {}, u = a.pkg;
                o = u ? s[u].url || s[u].uri : a.url || a.uri || t,
                    l(o, n && function () {
                        n(t)
                    }
                    )
            };
        n = function (t, e) {
            "function" != typeof e && (e = arguments[2]),
                t = t.replace(/\.js$/i, ""),
                o[t] = e;
            var n = i[t];
            if (n) {
                for (var r = 0, a = n.length; a > r; r++)
                    n[r]();
                delete i[t]
            }
        }
            ,
            e = function (t) {
                if (t && t.splice)
                    return e.async.apply(this, arguments);
                t = e.alias(t);
                var n = a[t];
                if (n)
                    return n.exports;
                var r = o[t];
                if (!r)
                    throw "[ModJS] Cannot find module `" + t + "`";
                n = a[t] = {
                    exports: {}
                };
                var i = "function" == typeof r ? r.apply(n, [e, n.exports, n]) : r;
                return i && (n.exports = i),
                n.exports && !n.exports["default"] && Object.defineProperty && Object.isExtensible(n.exports) && Object.defineProperty(n.exports, "default", {
                    value: n.exports
                }),
                    n.exports
            }
            ,
            e.async = function (n, r, i) {
                function a(t) {
                    for (var n, r = 0, h = t.length; h > r; r++) {
                        var p = e.alias(t[r]);
                        p in o ? (n = c[p] || c[p + ".js"],
                        n && "deps" in n && a(n.deps)) : p in s || (s[p] = !0,
                            l++,
                            f(p, u, i),
                            n = c[p] || c[p + ".js"],
                        n && "deps" in n && a(n.deps))
                    }
                }

                function u() {
                    if (0 === l--) {
                        for (var i = [], o = 0, a = n.length; a > o; o++)
                            i[o] = e(n[o]);
                        r && r.apply(t, i)
                    }
                }

                "string" == typeof n && (n = [n]);
                var s = {}
                    , l = 0;
                a(n),
                    u()
            }
            ,
            e.resourceMap = function (t) {
                var e, n;
                n = t.res;
                for (e in n)
                    n.hasOwnProperty(e) && (c[e] = n[e]);
                n = t.pkg;
                for (e in n)
                    n.hasOwnProperty(e) && (s[e] = n[e])
            }
            ,
            e.loadJs = function (t) {
                l(t)
            }
            ,
            e.loadCss = function (t) {
                if (t.content) {
                    var e = document.createElement("style");
                    e.type = "text/css",
                        e.styleSheet ? e.styleSheet.cssText = t.content : e.innerHTML = t.content,
                        r.appendChild(e)
                } else if (t.url) {
                    var n = document.createElement("link");
                    n.href = t.url,
                        n.rel = "stylesheet",
                        n.type = "text/css",
                        r.appendChild(n)
                }
            }
            ,
            e.alias = function (t) {
                return t.replace(/\.js$/i, "")
            }
            ,
            e.timeout = 5e3,
            t.__M.define = n,
            t.__M.require = e
    }
}(this)


this.__M.define("douyin_falcon:node_modules/byted-acrawler/dist/runtime", function (l, e) {
    Function(function (l) {
        return 'e(e,a,r){(b[e]||(b[e]=t("x,y","x "+e+" y")(r,a)}a(e,a,r){(k[r]||(k[r]=t("x,y","new x[y]("+Array(r+1).join(",x[y]")(1)+")")(e,a)}r(e,a,r){n,t,s={},b=s.d=r?r.d+1:0;for(s["$"+b]=s,t=0;t>>065:h=,y=,[y]=h66:u(e(t[b],,67:y=,d=,u((g=).x===c?r(g.y,y,k):g.apply(d,y68:u(e((g=t[b])<"<"?(b--,f):g+g,,70:u(!1)71:n72:+f73:u(parseInt(f,3675:if(){bcase 74:g=<<16>>16g76:u(k[])77:y=,u([y])78:g=,u(a(v,x-=g+1,g79:g=,u(k["$"+g])81:h=,[f]=h82:u([f])83:h=,k[]=h84:!085:void 086:u(v[x-1])88:h=,y=,h,y89:u({e{r(e.y,arguments,k)}e.y=f,e.x=c,e})90:null91:h93:h=0:;default:u((g<<16>>16)-16)}}n=this,t=n.Function,s=Object.keys||(e){a={},r=0;for(c in e)a[r]=c;a=r,a},b={},k={};r'.replace(/[-]/g, function (e) {
            return l[15 & e.charCodeAt(0)]
        })
    }("v[x++]=v[--x]t.charCodeAt(b++)-32function return ))++.substrvar .length(),b+=;break;case ;break}".split("")))()('gr$Daten Иb/s!l y͒yĹg,(lfi~ah`{mv,-n|jqewVxp{rvmmx,&effkx[!cs"l".Pq%widthl"@q&heightl"vr*getContextx$"2d[!cs#l#,*;?|u.|uc{uq$fontl#vr(fillTextx$$龘ฑภ경2<[#c}l#2q*shadowBlurl#1q-shadowOffsetXl#$$limeq+shadowColorl#vr#arcx88802[%c}l#vr&strokex[ c}l"v,)}eOmyoZB]mx[ cs!0s$l$Pb>>s!0s%yA0s"l"l!r&lengthb&l!l Bd>&+l!l &+l!l 6d>&+l!l &+ s,y=o!o!]/q"13o!l q"10o!],l 2d>& s.{s-yMo!o!]0q"13o!]*Ld>>b|s!o!l q"10o!],l!& s/yIo!o!].q"13o!],o!]*Jd>>b|&o!]+l &+ s0l-l!&l-l!i\'1z141z4b/@d, [Object.defineProperty(e, "__esModule", {value: !0})])
});

_bytedAcrawler = this.__M.require("douyin_falcon:node_modules/byted-acrawler/dist/runtime");

signature = _bytedAcrawler.sign(process.argv[2])
console.log(signature);

s1.js

爬抖音

import requests

user_id = '58841646784' # 6556303280


# 获取小姐姐的所有作品
"""
 signature = _bytedAcrawler.sign('用户ID')
 douyin_falcon:node_modules/byted-acrawler/dist/runtime
"""
import subprocess
signature = subprocess.getoutput('node s1.js %s' %user_id)



user_video_list = []


# ############################# 获取个人作品 ##########################
user_video_params = {
    'user_id': str(user_id),
    'count': '21',
    'max_cursor': '0',
    'aid': '1128',
    '_signature': signature,
    'dytk': 'b4dceed99803a04a1c4395ffc81f3dbc' # '114f1984d1917343ccfb14d94e7ce5f5'
}

def get_aweme_list(max_cursor=None):
    if max_cursor:
        user_video_params['max_cursor'] = str(max_cursor)
    res = requests.get(
        url="https://www.douyin.com/aweme/v1/aweme/post/",
        params=user_video_params,
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'x-requested-with':'XMLHttpRequest',
            'referer':'https://www.douyin.com/share/user/58841646784',
        }
    )
    content_json = res.json()
    aweme_list = content_json.get('aweme_list', [])

    user_video_list.extend(aweme_list)
    if content_json.get('has_more') == 1:
        return get_aweme_list(content_json.get('max_cursor'))


get_aweme_list()


# ############################# 获取喜欢作品 ##########################


favor_video_list = []

favor_video_params = {
    'user_id': str(user_id),
    'count': '21',
    'max_cursor': '0',
    'aid': '1128',
    '_signature': signature,
    'dytk': 'b4dceed99803a04a1c4395ffc81f3dbc'
}


def get_favor_list(max_cursor=None):
    if max_cursor:
        favor_video_params['max_cursor'] = str(max_cursor)
    res = requests.get(
        url="https://www.douyin.com/aweme/v1/aweme/favorite/",
        params=favor_video_params,
        headers={
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'x-requested-with':'XMLHttpRequest',
            'referer':'https://www.douyin.com/share/user/58841646784',
        }
    )
    content_json = res.json()
    aweme_list = content_json.get('aweme_list', [])
    favor_video_list.extend(aweme_list)
    if content_json.get('has_more') == 1:
        return get_favor_list(content_json.get('max_cursor'))


get_favor_list()


# ############################# 视频下载 ##########################
for item in user_video_list:
    video_id = item['video']['play_addr']['uri']

    video = requests.get(
        url='https://aweme.snssdk.com/aweme/v1/playwm/',
        params={
            'video_id':video_id
        },
        headers={
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'x-requested-with': 'XMLHttpRequest',
            'referer': 'https://www.douyin.com/share/user/58841646784',
        },
        stream=True
    )
    file_name = video_id + '.mp4'
    with open(file_name,'wb') as f:
        for line in video.iter_content():
            f.write(line)


for item in favor_video_list:
    video_id = item['video']['play_addr']['uri']

    video = requests.get(
        url='https://aweme.snssdk.com/aweme/v1/playwm/',
        params={
            'video_id':video_id
        },
        headers={
            'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
            'x-requested-with': 'XMLHttpRequest',
            'referer': 'https://www.douyin.com/share/user/58841646784',
        },
        stream=True
    )
    file_name = video_id + '.mp4'
    with open(file_name, 'wb') as f:
        for line in video.iter_content():
            f.write(line)


你可能感兴趣的:(爬虫)