python爬取快手视频并批量下载

import re
import requests
import time
import json
import os
from pprint import pprint


def get(url: str) -> dict:
    """
        title、imgs、videos
        """
    data = {
     }
    failed = {
     'msg': 'failed...'}
    headers = {
     
        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25",
        "Cookie": "did=web_68e0268146694843a92700d2de49a0a6;"
    }
    # rewrite desktop url
    temp = re.findall(r'live\.kuaishou\.com/u/\w+/(\w+)', url)
    if temp:
        url = 'https://c.kuaishou.com/fw/photo/{}'.format(temp[0])

    rep = requests.get(url, headers=headers, timeout=10)
    if rep.status_code != 200:
        return failed

    page_data = re.findall(r'', rep.text)
    if not page_data:
        return failed

    try:
        page_data = json.loads(page_data[0])
    except Exception:
        print('kuaishou loads json failed')
        return failed

    video_info = page_data['video']
    data['title'] = video_info['caption'].split(" ")[0]
    data['user'] = page_data['user']['name']
    # 获取视频
    try:  # 如果出错,则可能是长图视频
        data['videos'] = [video_info['srcNoMark']]
    except Exception:
        pass
    else:
        data['videoName'] = data['title']
        data['msg'] = '如果快手视频下载出错请尝试更换网络'
    # 获取图片
    try:  # 如果出错,则可能是普通视频;
        images = video_info['images']
        imageCDN: str = video_info['imageCDN']
        # 如果是长图视频,则这几项一定存在
        assert images is not None
        assert imageCDN is not None
    except Exception:
        pass
    else:
        if not imageCDN.startswith('http'):
            imageCDN = 'http://' + imageCDN
        data['imgs'] = [imageCDN + i['path'] for i in images]
    return data


def download(name: str, filename: str, url: str):
    try:
        os.mkdir("E:\\视频\\kuaishou\\"+name)
    except Exception as e:
        pass
    r = requests.get(url, stream=True)
    with open("E:\\视频\\kuaishou\\" + name + "\\" + filename + ".mp4", "wb") as mp4:
        for chunk in r.iter_content(chunk_size=1024 * 1024):
            if chunk:
                mp4.write(chunk)


if __name__ == "__main__":
    with open("E:\\视频\\video.txt", 'r') as file_to_read:
        while True:
            lines = file_to_read.readline()  # 整行读取数据
            if not lines:
                print("Finished!!!")
                break
            getVideo = get(lines)
            pprint(getVideo)
            download(getVideo['user'], getVideo['title'], getVideo['videos'][0])
            time.sleep(1)

需要将每个视频的网址放在一个txt文件下,比如这样:

https://live.kuaishou.com/u/zx34567studio/3xy2z95nw992rru?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/yuege33333/3xwapaztjzqfry9?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/yuege33333/3xk7h7k6ziam66g?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/Xiaxia977/3xa9c6z4irtvyx9?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xbyb7qjchwgeza/3x4f5xrztpqcgyy?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xbyb7qjchwgeza/3x4j6dtd3w5wj7a?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xi7ts3hndvw83g/3xex4u6bn2tkebw?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xi7ts3hndvw83g/3xhkdfxpc9h96su?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xi7ts3hndvw83g/3xcipugf9ycj65c?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xhvdt6nn3a5trq/3xtsqszw74pmgv2?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xhvdt6nn3a5trq/3xf6kkvhtq99wte?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xhvdt6nn3a5trq/3xz8fpa74te3irq?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xj82vnsnetdnbs?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xfdpt4we8kz4ki?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xyc52kqg3c4ejg?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xtrp9ud4vw2dzs?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xnensa7atv9nb9?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xk84tibykw7mba?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xcccj4g6erdzn2?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3x2vb44pyaysyd2?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3x4j2gt8b8p823w?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xkzsgx9ckdwn6k?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xm85ejcsfp6n44?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/qingge702/3xw8uj4zsp7swje?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3xkj8u8h8up9zia?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3xzv9cfj65z7mdw?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3xn8yrg7kasnec6?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3xnxhbxiw3fniby?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3x4vmqveeccbmty?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xdzbs7wti2eqc6/3xg9d2rrpw573ck?did=web_6949766fd3fc869e06adf4ad459aef38
https://live.kuaishou.com/u/3xbzgk99vs7g846/3x7hzqc8xtbe9r4?did=web_6949766fd3fc869e06adf4ad459aef38

你可能感兴趣的:(网络爬虫,python,爬虫,后端)