包图网素材视频爬取

import requests
from lxml import etree

try:
page = 1
start_url = 'https://ibaotu.com/shipin/7-0-0-0-0-{}.html'

for page in range(1, 223):
url = start_url.format(page)
headers = {'UserAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0'}

response = requests.get(url, headers=headers).text
sp = etree.HTML(response)
data_url = sp.xpath('//div[@class="video-play"]/video/@src')
data_title = sp.xpath('//span[@class="video-title"]/text()')

for sc_url,sc_title in zip(data_url,data_title):
html_url = 'https:'+sc_url
html_title = sc_title
# print(html_title,html_url)
content_ = requests.get(html_url,headers=headers)
with open('包图网视频素材/{}.mp4'.format(html_title),'wb') as f:
f.write(content_.content)
print("正在下载"+html_title)
print("已完成"+html_title+"下载")










except Exception as error:
print(error)

转载于:https://www.cnblogs.com/LQ970811/p/10715096.html

你可能感兴趣的:(包图网素材视频爬取)