bizhan爬虫,反爬虫机制严重

import os
import re
import requests
import json
import subprocess
from lxml import etree
from time import sleep

# 请求头信息
header = {
    "referer": "https://www.bilibili.com",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
}

# 提取视频和音频的播放地址
def get_play_url(url):
    try:
        response = requests.get(url, headers=header)
        response.raise_for_status()
        # 使用正则表达式匹配播放信息
        info = re.findall(r'window.__playinfo__=(.*?)', response.text)
        if not info:
            raise ValueError("无法找到播放信息")

        info = info[0]
        play_info = json.loads(info)

        # 提取视频和音频的基础URL
        video_url = play_info["data"

你可能感兴趣的:(爬虫)