import requests
import os
import re
import threading
root = "E://movie//" # 根目录
kv = {'user-agent':'Mozilla/5.0'} # 模拟浏览器请求头
def download(url): # 封装下载器
try:
path = root + url.split('/')[-1] # 文件保存路径
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r = requests.get(url, headers = kv) # requests库的请求信息
r.raise_for_status() # 爬虫时候的异常处理
with open(path, 'wb') as f:
f.write(r.content) # 以二进制形式写
f.close()
else:
print("File already exists")
except:
print("\nVideo crawl failed")
def job1(data1,url):
global url1 # 声明全局变量
for i in range(0,data1):
if i>=1000:
p = re.compile(r'.$') # 正则表达式 最后一位
url1 = re.sub(p, '', url) # 正则表达式删除操作
url = url1 + '%03d' % i + '.ts' # 构造新的url
print('\rdownloaded percent%f' % (((i+1)/data1)*100), end='') # \r属性使指针不指向下一行,指向这一行的行首 end=''使print语句中末尾不添加换行符,直接加一个空字符
download(url) # 下载器 url下载视频资源
def job2(data2,url):
global num1,url1
for i in range(num1,data2):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
def job3(data3,url):
global num2,url1
for i in range(num2,data3):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
def job4(data4,url):
global num3,url1
for i in range(num3,data4):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
def job5(data5,url):
global num4,url1
for i in range(num4,data5):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
def job6(data6,url):
global num5,url1
for i in range(num5,data6):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
def job7(data7,url):
global num6,url1
for i in range(num6,data7):
if i>=1000:
p = re.compile(r'.$')
url1 = re.sub(p, '', url)
url = url1 + '%03d' % i + '.ts'
download(url)
if __name__=='__main__':
url = input('Please enter the url: ')
p = re.compile(r'......$')
url1 = re.sub(p,'',url)
num = int(input('Please enter the number of video: '))
num1 = int(num/7)
num2 = num1*2
num3 = num1*3
num4 = num1*4
num5 = num1*5
num6 = num1*6
num7 = num1*7
t1 = threading.Thread(target=job1,args=(num1,url1)) # 定义线程,并且通过args传参, 需要传一个参数时,可以这样写 num1,
t2 = threading.Thread(target=job2,args=(num2,url1))
t3 = threading.Thread(target=job3,args=(num3,url1))
t4 = threading.Thread(target=job4,args=(num4,url1))
t5 = threading.Thread(target=job5,args=(num5,url1))
t6 = threading.Thread(target=job6,args=(num6,url1))
t7 = threading.Thread(target=job7,args=(num7,url1))
t1.start() # 线程开始
t2.start()
t3.start()
t4.start()
t5.start()
t6.start()
t7.start()
t1.join() # 线程结束以后才开始执行下面的语句
t2.join()
t3.join()
t4.join()
t5.join()
t6.join()
t7.join()
print('\nThe download is complete')
print('press any key to exit')
可以通过解析真实视频地址,来下载ts流文件。 在这里,将代码封装成了一个简单的exe。
以下是百度网盘链接:
链接:https://pan.baidu.com/s/1NcwksIpKuVcrKjOEWAKhSQ
提取码:4vu3