今日头条-阳光宽频视频真实视频下载地址解析-python3实现

今日头条的好多视频跳转的 阳光宽频 这个网站,最近做视频地址解析,所以顺手用 Python写了个下载地址解析,还是蛮简单的。

import requests
import re
import execjs
import json
import base64



toutiao_js = '''

// 获取可以请求视频地址的接口地址
var checkUrl = function(vid) {
    pathname = "/video/urls/v/1/toutiao/mp4/"+vid
    host ="//ib.365yg.com"
    url =  host+pathname
    var n = function() {
        for (var t = 0, e = new Array(256), n = 0; 256 !== n; ++n)
            t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = 1 & (t = n) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1) ? -306674912 ^ t >>> 1 : t >>> 1,
            e[n] = t;
        return "undefined" != typeof Int32Array ? new Int32Array(e) : e
    }()
      , r = pathname + "?r=" + Math.random().toString(10).substring(2);
    "/" !== r[0] && (r = "/" + r);
    var i = function(t) {
        for (var e, r, i = -1, o = 0, a = t.length; o < a; )
            (e = t.charCodeAt(o++)) < 128 ? i = i >>> 8 ^ n[255 & (i ^ e)] : e < 2048 ? i = (i = i >>> 8 ^ n[255 & (i ^ (192 | e >> 6 & 31))]) >>> 8 ^ n[255 & (i ^ (128 | 63 & e))] : e >= 55296 && e < 57344 ? (e = 64 + (1023 & e),
            r = 1023 & t.charCodeAt(o++),
            i = (i = (i = (i = i >>> 8 ^ n[255 & (i ^ (240 | e >> 8 & 7))]) >>> 8 ^ n[255 & (i ^ (128 | e >> 2 & 63))]) >>> 8 ^ n[255 & (i ^ (128 | r >> 6 & 15 | (3 & e) << 4))]) >>> 8 ^ n[255 & (i ^ (128 | 63 & r))]) : i = (i = (i = i >>> 8 ^ n[255 & (i ^ (224 | e >> 12 & 15))]) >>> 8 ^ n[255 & (i ^ (128 | e >> 6 & 63))]) >>> 8 ^ n[255 & (i ^ (128 | 63 & e))];
        return -1 ^ i
    }(r) >>> 0;
    return  "http:"+host+r + "&s=" + i
}




'''
class KuanPing(object):

    def __init__(self):
        self.js_ctx = execjs.compile(toutiao_js)

    def parse_video_url(self,url):

        html = requests.get(url).content.decode("utf-8")

        video_id =  re.search("videoId:\s*'(.*?)'",html).group(1)
        video_api = self.js_ctx.call("checkUrl",video_id)

        html = requests.get(video_api).content.decode("utf-8")

        html = json.loads(html)

        main_url = html["data"]["video_list"]["video_1"]["main_url"]

        return  base64.b64decode(main_url).decode("utf-8")


url = (KuanPing().parse_video_url("https://www.365yg.com/a6646168469953839624/#mid=1618636632726542"))

print(url)

你可能感兴趣的:(爬虫抓取)