import requests
import json
#数据抓取
base_url="https://haokan.baidu.com/haokan/ui-search/pc/search/video?pn=1&rn=10&type=video&query=%E9%93%81%E8%BD%A8%E4%B8%8B%E9%9B%A8&sign=d0fa20768e3d07642df1ae7734d1793c&version=1×tamp=1688716281831"
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
'referer':'https://haokan.baidu.com/web/search/page?query=%E8%BD%A8%E9%81%93%E7%BB%B4%E4%BF%AE%E4%BA%BA%E5%91%98&sfrom=recommend',#来源
'cookie':'BIDUPSID=08022F8AB33EACAE864544FEEB0F219D; PSTM=1688363384; BAIDUID=08022F8AB33EACAE29191151407F22B6:FG=1; newlogin=1; BDUSS=0pBYW5od3l6fkcxTXN2UWxoS1UzTkYwN2pKekVqc2Q2YVFyflNYSTlsNW02TTFrSVFBQUFBJCQAAAAAAAAAAAEAAAAkvexmy6~Ep7ChAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGZbpmRmW6ZkT; BDUSS_BFESS=0pBYW5od3l6fkcxTXN2UWxoS1UzTkYwN2pKekVqc2Q2YVFyflNYSTlsNW02TTFrSVFBQUFBJCQAAAAAAAAAAAEAAAAkvexmy6~Ep7ChAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGZbpmRmW6ZkT; hkpcSearch=%u94C1%u8F68%u52A8%u7269%u4FB5%u5165%24%24%24%u94C1%u8F68%u4E0B%u96E8%24%24%24%u94C1%u8F68%u8FDB%u7AD9%24%24%24%u5730%u94C1%u4E0B%u96E8%u8FDB%u7AD9%24%24%24%u5730%u94C1%u8F66%u5934%u89C6%u89D2%24%24%24%u5730%u94C1%20%u96E8%u5929%u8FDB%u7AD9%24%24%24%u591A%u9879%u7ECF%u6D4E%u6307%u6807%u56DE%u5347%24%24%24%u8F68%u9053%u7EF4%u4FEE%u4EBA%u5458%24%24%24%u8F68%u9053%20%u969C%u788D%u7269; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1688452293,1688628984,1688706633,1688950892; COMMON_LID=5dc72c878b9fbe4539c89a93e3c42bd2; BAIDUID_BFESS=08022F8AB33EACAE29191151407F22B6:FG=1; delPer=0; PSINO=5; BA_HECTOR=a1210021250ka10g20ala1261ian5kq1p; ZFY=Vif:B3WXj5t4id7pN73bJgePd7rAhWQFhTAJO5Vl:AykY:C; PC_TAB_LOG=Personal_center_page; H_PS_PSSID=36554_38642_38831_39026_39024_38943_38879_38957_38955_38982_38917_38820_38988_38639_26350_39042_38949; ab_sr=1.0.1_MWNjZWM0MjQyNWJkYjdmN2Q0MjRiN2Y0Yzc0NjQ2OWIyNjY2YTllMDU5MTllMWZmNGUxNjYwYjMzYjlhNzZiMTg5YzdmYTc5ZjZhYTg2NGI5MTA0Yzk3ZjQ1YmJmMWZhNDcxNGU5ZWRjNjEzMWYzYTAyNmU3OTgyNjEwNWU1ODBkZjQ4MWM2YmFhZjAwMzQ5NGEzYzk0ZWFmMzBjODJhMw==; ariaDefaultTheme=undefined; reptileData=%7B%22data%22%3A%2245efcf3602e427f72900def606300acd473425573f177678b298c6a9f7ca2354196ce494becdaa872aae77639dda3be7d2ef6823600cfb9669bbb276261ee59ac60fd57dcc5303ef73439530bab2514392d569f01d75e1340ab225bae4d428b6%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%2271b7def9%22%7D; RT="z=1&dm=baidu.com&si=dc998e9a-58ac-4023-8292-a895aa742b9d&ss=ljwerpou&sl=g&tt=mc8&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=1ejtj&nu=mvmbv6mo&cl=1elci&ul=1elgi&hd=1elj5"; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1688968397'
}
response=requests.get(base_url,headers=headers,allow_redirects = False)#第一次请求
print(response.status_code)#200为可以访问
data=response.text#文本格式
print(data)#转换前的json格式
#数据解析,json.load转换格式
json_data=json.loads(data)
print(json_data)#转换后的格式
#抓取json嵌套的内容
json_list=json_data['data']['list']
print(json_list)
#将列表形式一个个拿出来,放入data中
for data in json_list:
video_title=data['title']+'.mp4'
video_url=data['url']
print(video_title,video_url)#标题+url
print('正在下载:',video_title)
#二次请求
video_data=requests.get(video_url,headers=headers).content#二进制格式
with open(r'./好看视频-轨道/'+video_title,'wb')as f:
f.write(video_data)
print('下载完成\n')