一时心血来潮,突然想着既然前面学习了爬虫的知识,已经成功爬取了百度图片,那么接下来是不是应该试着爬取视频,话不多说有了想法就是干。首先实现本地视频的播放,主要功能有:打开文件|暂停|播放|进度条拖动进度|全屏播放;在这个基础上,我准备爬取网络视频,爬取过程比较繁琐,最后就是准备将爬取的视频通过视频流的方式输入播放器中进行自动播放。代码如下:(有点乱,不想整理了,写的已经有点凌乱,有大佬想继续编下去可以随便拿走)
import sys
import os
import requests as req
import re
from bs4 import BeautifulSoup
from Crypto.Cipher import AES
import threading
import time
from PyQt5.QtWidgets import *
from PyQt5.QtMultimedia import *
from PyQt5.QtMultimediaWidgets import QVideoWidget
from PyQt5.QtCore import *
from PyQt5 import QtCore, QtWidgets
# ________________________________________________
# 函数里面的变量一定要用self.xxx,如果调试正确,运行不正确
# ________________________________________________
# python2:super(子类,self).父类方法名
# python3:super().fun(arg),父类直接作为子类参数,super省略了括号中的参数(可理解为自动识别),记得加参数,可以是__init__方法,直接继承整个父类方法
class myVideoWidget(QVideoWidget):
doubleClickedItem = pyqtSignal(str) # 创建双击信号
# 类的构造函数def __init__(self,参数):
def __init__(self,parent=None):
super().__init__(parent)
def mouseDoubleClickEvent(self,QMouseEvent): #双击事件
self.doubleClickedItem.emit("double clicked")
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(800, 600)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.gridLayout = QtWidgets.QGridLayout(self.centralwidget)
self.gridLayout.setObjectName("gridLayout")
self.lab_video = QtWidgets.QLabel(self.centralwidget)
self.lab_video.setObjectName("lab_video")
self.gridLayout.addWidget(self.lab_video, 1, 3, 1, 1)
self.btn_stop = QtWidgets.QPushButton(self.centralwidget)
self.btn_stop.setObjectName("btn_stop")
self.gridLayout.addWidget(self.btn_stop, 2, 3, 1, 1)
self.sld_video = QtWidgets.QSlider(self.centralwidget)
self.sld_video.setOrientation(QtCore.Qt.Horizontal)
self.sld_video.setObjectName("sld_video")
self.gridLayout.addWidget(self.sld_video, 1, 0, 1, 3)
self.wgt_video = myVideoWidget(self.centralwidget)
self.wgt_video.setObjectName("wgt_video")
self.gridLayout.addWidget(self.wgt_video, 0, 0, 1, 4)
self.btn_play = QtWidgets.QPushButton(self.centralwidget)
self.btn_play.setObjectName("btn_play")
self.gridLayout.addWidget(self.btn_play, 2, 2, 1, 1)
self.btn_open = QtWidgets.QPushButton(self.centralwidget)
self.btn_open.setObjectName("btn_open")
self.gridLayout.addWidget(self.btn_open, 2, 0, 1, 2)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 30))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "自制视频浏览器"))
self.lab_video.setText(_translate("MainWindow", "0%"))
self.btn_stop.setText(_translate("MainWindow", "暂停"))
self.btn_play.setText(_translate("MainWindow", "播放"))
self.btn_open.setText(_translate("MainWindow", "打开"))
def func(u, cryptor):
target = "movie1"
url = re.findall("(.*)-", u)[0]
name = re.findall("-(.*)", u)[0]
res = req.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'}).content
try:
f = open(target + "/" + str(name) + ".m3u8", 'wb')
except:
f = open(str(name) + ".m3u8", 'wb')
plain_text = cryptor.decrypt(res)
f.write(plain_text)
f.close()
print("还在子线程")
time.sleep(5)
def duoxiancheng(func,b,cryptor,name):
thread_list = []
for u in b:
thread_temp = threading.Thread(target=func, args=(u, cryptor))
thread_list.append(thread_temp)
for i in thread_list:
i.start()
for i in thread_list:
i.join()
try:
os.chdir("movie1")
os.system("copy /b *.m3u8 " + name + ".mp4")
os.system("del *.m3u8")
except:
os.system("copy /b *.m3u8 " + name + ".mp4")
os.system("del *.m3u8")
print("完成合并并删除源文件!")
thread_list.clear()
class myMainWindow(Ui_MainWindow, QMainWindow):
def __init__(self):
super(Ui_MainWindow, self).__init__()
self.setupUi(self)
self.videoFullScreen = False # 判断当前widget是否全屏
self.dir = os.getcwd()
self.num = 3
self.medialist = QMediaPlaylist()
self.list_before = []
# 初始化的时候就缓存两-三个,每次播完一个就:下载并添加
self.target ="movie1"
# _________________________________________________________________________________
vip_movie_url = "https://v.qq.com/x/cover/sdh5hiwzkterio0.html"
jiexi_url = "https://jx.618g.com/?url="
url = jiexi_url + vip_movie_url
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36'}
response = req.get(url, headers=header)
html = response.content
soup = BeautifulSoup(html, "lxml")
src = (re.findall("url=(.+)", soup.iframe['src']))[0]
content_url = req.get(src, headers=header).content
movie_pix_lilnk_1 = (str(content_url, encoding='utf-8').split('\n'))[2]
movie_pix_link_2 = re.findall("(https://.+?)/", src)[0]
movie_pix_link = (movie_pix_link_2 + movie_pix_lilnk_1).replace('\n', "")
movie_pix_and_key = str((req.get(movie_pix_link).content), encoding='utf-8')
key_url = re.findall("URI=\"(.*?)\"", movie_pix_and_key)[0]
key = req.get(key_url, headers=header).content
self.cryptor = AES.new(key, AES.MODE_CBC, key)
self.movie_pix_html = re.findall("https://.+ts", movie_pix_and_key)
for i in range(len(self.movie_pix_html)):
self.movie_pix_html[i] = self.movie_pix_html[i] + "-" + str(i)
b = self.movie_pix_html[0:10]
b_1 = self.movie_pix_html[10:20]
b_2 = self.movie_pix_html[20:30]
duoxiancheng(func, b, self.cryptor, "new")
duoxiancheng(func, b_1, self.cryptor, "new_1")
duoxiancheng(func, b_2, self.cryptor, "new_2")
# ______________________________________________________________________________
os.chdir(self.dir)
for i in os.listdir(self.target):
self.list_before.append(i)
self.medialist.addMedia(QMediaContent(QUrl(self.target+"/"+i)))
self.videoFullScreenWidget = myVideoWidget() # 创建一个全屏的widget
self.videoFullScreenWidget.setFullScreen(1)
self.videoFullScreenWidget.hide() # 不用的时候隐藏起来
self.player = QMediaPlayer()
self.player.setVideoOutput(self.wgt_video) # 视频播放输出的widget,就是上面定义的
self.btn_open.clicked.connect(self.openVideoFile) # 打开视频文件按钮
self.btn_play.clicked.connect(self.playVideo) # play
self.btn_play.clicked.connect(self.sliderchage)
self.btn_stop.clicked.connect(self.pauseVideo) # pause
self.player.positionChanged.connect(self.changeSlide) # change Slide
self.videoFullScreenWidget.doubleClickedItem.connect(self.videoDoubleClicked) # 双击响应
self.wgt_video.doubleClickedItem.connect(self.videoDoubleClicked) # 双击响应
self.sld_video.sliderReleased.connect(self.sliderchage)
self.player.currentMediaChanged.connect(self.change_movie_name)# 后台删除并改名字
# lambda: QtWidgets.QMessageBox.information(self, "提示", "我是info类型的MessageBox!")
def insert_playlist(self):
b = self.movie_pix_html[(self.num) * 10:(self.num + 1) * 10]
duoxiancheng(func, b, self.cryptor, "new" + str(self.num))
self.num = self.num + 1
k = 0
os.chdir(self.dir)
dir_path = os.listdir(self.target)
for i in dir_path:
for j in self.list_before:
if i != j:
k = k + 1
if k == len(self.list_before):
self.list_before.append(i)
self.medialist.addMedia(QMediaContent(QUrl("movie/" + i)))
k = 0
def change_movie_name(self):
self.player.play()
self.insert_playlist()
# 每次播放完一个,向列表中添加一个
# thread_list=[]
# thread_temp = threading.Thread(target=lambda: self.player.play())
# thread_list.append(thread_temp)
# thread_temp_1 = threading.Thread(target=self.download)
# thread_list.append(thread_temp_1)
# for i in thread_list:
# i.start()
def sliderchage(self):
self.player.pause()
slider_value = self.sld_video.value()
pos = int(slider_value/100*self.vidoeLength)
self.player.setPosition(pos)
self.player.play()
# QtWidgets.QMessageBox.information(self, "当前值:", str(slider_value))
def openVideoFile(self):
# a = QMediaContent(QUrl("new.mp4"))
# b = QMediaContent(QUrl("new_1.mp4"))
# self.medialist.addMedia(a)
# self.medialist.addMedia(b)
# a=QMediaContent(QUrl("new.mp4"))
# print(a)
#
#
#
#
# self.player.setMedia(QMediaContent(QUrl("https://jx.618g.com/?url=https://v.qq.com/x/cover/sdh5hiwzkterio0.html"))) # 选取视频文件
self.player.setPlaylist(self.medialist)
self.player.play() # 播放视频
def playVideo(self):
self.player.play()
def pauseVideo(self):
self.player.pause()
def changeSlide(self, position):
self.vidoeLength = self.player.duration() + 0.1
self.sld_video.setValue(round((position / self.vidoeLength) * 100))
# round(a,2)取a的后两位
self.lab_video.setText(str(round((position / self.vidoeLength) * 100, 2)) + '%')
def videoDoubleClicked(self, text):
if self.player.duration() > 0: # 开始播放后才允许进行全屏操作
if self.videoFullScreen:
self.wgt_video.setFullScreen(0)
# self.player.pause()
# self.videoFullScreenWidget.hide()
# self.player.setVideoOutput(self.wgt_video)
# self.player.play()
self.videoFullScreen = False
else:
self.wgt_video.setFullScreen(1)
# self.player.pause()
# self.videoFullScreenWidget.show()
# self.player.setVideoOutput(self.videoFullScreenWidget)
# self.player.play()
self.videoFullScreen = True
if __name__ == '__main__':
app = QApplication(sys.argv)
vieo_gui = myMainWindow()
vieo_gui.show()
sys.exit(app.exec_())
首先搭建一个本地视频播放器,这个不多说,可以参考这位博主的文章,主要就是几个类的应用,其次要懂得python的类的继承,如何对相关的类的方法重写。
# python2:super(子类,self).父类方法名 # python3:super().fun(arg),父类直接作为子类参数,super省略了括号中的参数(可理解为自动识别), 记得加参数,可以是__init__方法,直接继承整个父类方法,一般写在子类的这个__init__中
class myVideoWidget(QVideoWidget):
doubleClickedItem = pyqtSignal(str) # 创建双击信号
# 类的构造函数def __init__(self,参数):
def __init__(self,parent=None):
super().__init__(parent)
def mouseDoubleClickEvent(self,QMouseEvent): #双击事件
self.doubleClickedItem.emit("double clicked")
接下来呢,就是一个视频的爬取,视频的爬取,首先进入网站,很容易找到.m3u8的链接,重点是爬取速度和文件解码。
使用requests爬取挨个爬取,速度是急死个人。在这用了多线程爬取(不是多进程)。一开始用的多进程,然并卵,速度没有明显改善,反而更慢,后来了解了多线程和多进程的区别后,发现涉及写入读取以及一些爬虫请求之类的用多线程,因为线程之间共享数据,多进程一般用来计算使用。但是使用多线程之后呢,又有一个问题,就是可能请求太快,服务器判断我为恶意攻击,所以就不给我返回包了,此时换一个用户代理,并且每次申请完设置一个等待时间,一般2-3秒即可。
文件解码用到了from Crypto.Cipher import AES,这个Crypto模块是比较老的一个模块,在python3以后啊,一般直接下载pycryptodome模块就行了,注意这两模块冲突的,如果之前装了老模块,请卸载!!!新模块有自己的官网,可以官网直接下载,pip可能会出问题。
这个问题就是鄙人一直头大的事,因为这个功能也比较鸡肋,编了半天都没编出来,头大。。。,转念一想,编出来了也有时延,还不如用浏览器直接看呢,爬虫直接爬视频就好了。