python 快手视频去水印批量下载

import requests
from bs4 import BeautifulSoup
import re
import xlrd

class KSNoMark():
    #  忽略警告代码
    requests.packages.urllib3.disable_warnings()

    def GetShareUrl(self):
        try:
            # 打开Excel表格
            wb = xlrd.open_workbook('分享链接.xlsx')
            # 获取当前正在显示的sheet
            sheet = wb.sheet_by_name('url')

            urls = [(sheet.cell_value(i, 0))for i in range(1, sheet.nrows)]

            while '' in urls:
                urls.remove('')
        except FileNotFoundError:
            print("分享链接.xlsx文件不存在")
        return urls

    def GetRealUrl(self):
        headers = {
            'Cookie': 'did=web_e4581e2dbe33aae5eac3d9fdf8b12566;',
            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
        }
        urls = self.GetShareUrl()

        num = 0

        for url in urls:
            response = requests.get(url, headers=headers, allow_redirects=False,verify=False)
            share_url = response.headers['Location']

            share_response = requests.get(share_url,headers=headers,verify=False).text

            # 通过BeautifulSoup提取无水印播放地址字符串
            soup = BeautifulSoup(share_response,'lxml')
            noWaterMarkVideo = soup.find(attrs={'id': 'hide-pagedata'}).attrs['data-pagedata']
            # print(noWaterMarkVideo)

            try:
                # 正则处理字符串获取真实地址
                pattern = re.compile('\"srcNoMark\":"(.*?)"},',re.S)
                real_url = re.findall(pattern,noWaterMarkVideo)[0]

                r_video = requests.get(real_url, stream=True)

                with open('./video/' + str(num) + '.mp4', "wb") as mp4:
                    for chunk in r_video.iter_content(chunk_size=1024 * 1024):
                        if chunk:
                            mp4.write(chunk)

                print(real_url)
            except Exception as e:
                print(e)
                pass

            num += 1
            continue

if __name__ == '__main__':
    KSNoMark = KSNoMark()
    KSNoMark.GetRealUrl()
    print("快手视频采集完成")

分享链接.xlsx跟上篇文章一样,记得新建个video文件夹就行,就不写os判断了 懒 zz
python 快手视频去水印批量下载_第1张图片

 

你可能感兴趣的:(python)