本文源码:点击此处跳转文末名片获取
Python 3.8
Pycharm
import requests
import pprint
import re
平台审核原因,有的东西放了不过审,有需要的可以文末名片找我~
本文源码:点击此处跳转文末名片获取
for page in range(2, 6):
print(f'正在采集第{page}页的数据内容')
link = f''
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
}
html_data = requests.get(url=link, headers=headers).text
video_id_list = re.findall(', html_data)
print(video_id_list)
for video_id in video_id_list:
url = f''
# headers = {
# 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
#
response = requests.get(url=url, headers=headers)
# print(response.json())
# pprint.pprint(response.json())
title = response.json()['data']['moment']['title']
title = re.sub(r'[\/:*?"<>|\n\r\t]', '', title)
video_url = response.json()['data']['moment']['videoInfo']['definitions'][0]['url']
print(title)
print(video_url)
video_content = requests.get(url=video_url, headers=headers).content
with open('video\\' + title +video_id + '.mp4', mode='wb') as f:
f.write(video_content)