爬取哔哩哔哩视频

目标

给定一个哔哩哔哩的网址,将视频下载下来

import re
import subprocess
import requests
from moviepy.editor import VideoFileClip
j=0
def get_json(url, referer):

    headers = {"Referer": referer,
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'}
    global j
    j += 1
    path = "G:\\app\\" + str(j) + '.flv'

    try:
        response = requests.get(url, headers=headers, stream=True)
        chunk_size = 1024  # 每次下载的数据大小
        content_size = int(response.headers['content-length'])  # 总大小
        if response.status_code == 200:
            print('[文件大小]:%0.2fMB' % (content_size / chunk_size / 1024))  # 换算单位
            with open(path, 'wb')as f:
                for data in response.iter_content(chunk_size=chunk_size):
                    f.write(data)

        f.close()
    except:
        try:
            response = requests.get(url, headers=headers, stream=True)
            chunk_size = 1024  # 每次下载的数据大小
            content_size = int(response.headers['content-length'])  # 总大小
            if response.status_code == 200:
                print('[文件大小]:%0.2fMB' % (content_size / chunk_size / 1024))  # 换算单位
                with open(path, 'wb')as f:
                    for data in response.iter_content(chunk_size=chunk_size):
                        f.write(data)

            f.close()
        except:
            print("请求错误")
            pass
referer="https://www.bilibili.com/video/av97775498?spm_id_from=333.851.b_7265706f7274466972737431.12"
response=requests.get(referer)
reg=r'http://upos-sz(.*?)logo'
reg2=r"http://cn-(.*?)logo"
reg3=r"https://cn-(.*?)logo"//可能的视频网址
reg=re.compile(reg)
reg2=re.compile(reg2)
#print(response.text)
txt1=re.findall(reg,str(response.text))
txt2=re.findall(reg2,str(response.text))
try:
    txt1[0] = "http://upos-sz" + txt1[0] + "logo=40000000"
    txt1[-1] = "http://upos-sz" + txt1[-1] + "logo=40000000"
    get_json(txt1[0],referer)
    get_json(txt1[-1],referer)
except:
    txt2[0] = "http://cn-" + txt2[0] + "logo=40000000"
    txt2[-1] = "http://cn-" + txt2[-1] + "logo=40000000"
    get_json(txt2[0], referer)
    get_json(txt2[-1], referer)

'''
前边视频后边音频
'''

你可能感兴趣的:(python)