Python 爬取Bilibili 视频

import requests
from lxml import html
import os
import pprint
import time
import random
from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
save_url = r'D:\Download\bilibili'

def get_yeshu():            #每个页面url
    yeshu = []
    for i in range(1):
        yeshu.append("https://api.vc.bilibili.com/board/v1/ranking/top?page_size=10&next_offset={yeshu}&tag=%E4%BB%8A%E6%97%A5%E7%83%AD%E9%97%A8&platform=pc".format(yeshu=i*10+1))
    return yeshu


def get_url(pagelist):      #获得视频真实url信
    session=requests.session()
    for url in pagelist:
        x = 0
        response = session.get(url,headers=header).json()
        # pprint.pprint(response)
        #print(resonspe["data"]["items"][0]["item"]["description"])
        #print(response["data"]["items"][0]["item"]["video_playurl"])
        for i in range(10):
            x += 1
            # print(response["data"]["items"][i])
            save_movie(response["data"]["items"][i],x)
            time.sleep(random.randint(2,6))


def save_movie(message,x):#保存
    try:
        session=requests.session()
        url=message["item"]["video_playurl"]
        # r = requests.get(url, headers=header,stream=True)
        # response = session.get(url, headers=header,stream=True).content
        movie_name='视频标题'
        downsize=0
        print('开始下载')
        startTime=time.time()
        response=session.get(url, headers=header, stream=True,timeout=10)
        with open(os.path.join(save_url, "视频标题{x}.mp4".format(x=x)), "wb") as f:
            for chunk in response.iter_content(chunk_size=100000):
                if chunk:
                    f.write(chunk)
                    downsize+=len(chunk)
                    line='downloading %d KB/s - %.2f MB, 共 %.2f MB'
                    line=line % (downsize / 1024 / (time.time() - startTime), downsize / 1024 / 1024, downsize / 1024 / 1024)
                    print(line)
    except Exception as e:
        print(e)



if __name__ == "__main__":
    if not os.path.isdir(save_url):  # 目录不存在就创建
        os.mkdir(save_url)
    urllist = get_yeshu()
    get_url(urllist)
    print('Completed......')
    # pprint.pprint(urllist)
    # get_url()

 

你可能感兴趣的:(Python)