基于python Moviepy的视频字幕识别和合成!

前言:前几天看到一篇《用Python解决女朋友看电影没字幕的需求》,觉得很有意思。但是有道的识别没有做过,找了下其他的接口,发现讯飞的识别比较准确。就做了这个小程序。

讯飞接口:https://console.xfyun.cn/services/lfasr
申请api和SK。
免费时长:新用户可识别的视频时长能达到5个小时。用多少算多少。一个月的有效期。
基于python Moviepy的视频字幕识别和合成!_第1张图片
下载调用示例demo:https://www.xfyun.cn/doc/asr/lfasr/API.html#%E8%B0%83%E7%94%A8%E7%A4%BA%E4%BE%8B
基于python Moviepy的视频字幕识别和合成!_第2张图片
下载demo就可以直接利用其接口生产识别文件,不用自己去写识别程序了。
主要程序
1、窗体文件

class PyWinDesign:
    def __init__(self, 启动窗口):
        self.启动窗口 = 启动窗口
        self.appid=""
        self.secret_key=""
        self.video_path=""
        self.finish_video_path=""
        self.start_time=""
        self.end_time=""
        self.videoclip=""
        self.out_video_path=""
        self.srt_files_path=""
        self.sum_video_path=""
        self.启动窗口 = 启动窗口
        self.启动窗口.title ('视频字幕识别和合成')
        self.启动窗口.resizable (width=False, height=False)
        screenwidth = self.启动窗口.winfo_screenwidth ()
        screenheight = self.启动窗口.winfo_screenheight ()
        size = '%dx%d+%d+%d' % (644, 400, (screenwidth - 644) / 2, (screenheight - 400) / 2)
        self.启动窗口.geometry (size)

        self.标签1_标题 = tk.StringVar ()
        self.标签1_标题.set ('讯飞接口:')
        self.标签1 = tk.Label (self.启动窗口, textvariable=self.标签1_标题, anchor=tk.W)
        self.标签1.place (x=19, y=22, width=68, height=31)

        self.标签2_标题 = tk.StringVar ()
        self.标签2_标题.set ('Appid:')
        self.标签2 = tk.Label (self.启动窗口, textvariable=self.标签2_标题)
        self.标签2.place (x=20, y=71, width=64, height=28)

        self.编辑框1_内容 = tk.StringVar ()
        self.编辑框1_内容.set ('')
        self.编辑框1 = ttk.Entry (self.启动窗口, textvariable=self.编辑框1_内容, justify=tk.LEFT)
        self.编辑框1.place (x=98, y=71, width=141, height=32)

        self.标签3_标题 = tk.StringVar ()
        self.标签3_标题.set ('Secret_key:')
        self.标签3 = tk.Label (self.启动窗口, textvariable=self.标签3_标题)
        self.标签3.place (x=21, y=106, width=67, height=39)

        self.编辑框2_内容 = tk.StringVar ()
        self.编辑框2_内容.set ('')
        self.编辑框2 = tk.Entry (self.启动窗口, textvariable=self.编辑框2_内容, justify=tk.LEFT)
        self.编辑框2.place (x=99, y=113, width=504, height=35)

        self.按钮1_标题 = tk.StringVar ()
        self.按钮1_标题.set ('选择待识别视频地址')
        self.按钮1 = tk.Button (self.启动窗口, textvariable=self.按钮1_标题, command=self.按钮1_被鼠标左键单击)
        self.按钮1.place (x=22, y=186, width=120, height=39)

        self.编辑框3_内容 = tk.StringVar ()
        self.编辑框3_内容.set ('')
        self.编辑框3 = tk.Entry (self.启动窗口, textvariable=self.编辑框3_内容, justify=tk.LEFT)
        self.编辑框3.place (x=158, y=192, width=443, height=31)

        self.标签4_标题 = tk.StringVar ()
        self.标签4_标题.set ('视频识别开始时间:')
        self.标签4 = tk.Label (self.启动窗口, textvariable=self.标签4_标题, anchor=tk.W)
        self.标签4.place (x=156, y=226, width=106, height=35)

        self.编辑框4_内容 = tk.StringVar ()
        self.编辑框4_内容.set ('')
        self.编辑框4 = tk.Entry (self.启动窗口, textvariable=self.编辑框4_内容, justify=tk.LEFT)
        self.编辑框4.place (x=268, y=225, width=43, height=31)

        self.标签5_标题 = tk.StringVar ()
        self.标签5_标题.set ('秒')
        self.标签5 = tk.Label (self.启动窗口, textvariable=self.标签5_标题, anchor=tk.W)
        self.标签5.place (x=321, y=231, width=27, height=24)

        self.标签6_标题 = tk.StringVar ()
        self.标签6_标题.set ('视频识别结束时间:')
        self.标签6 = tk.Label (self.启动窗口, textvariable=self.标签6_标题, anchor=tk.W)
        self.标签6.place (x=157, y=266, width=103, height=28)

        self.编辑框5_内容 = tk.StringVar ()
        self.编辑框5_内容.set ('')
        self.编辑框5 = tk.Entry (self.启动窗口, textvariable=self.编辑框5_内容, justify=tk.LEFT)
        self.编辑框5.place (x=267, y=264, width=43, height=28)

        self.标签7_标题 = tk.StringVar ()
        self.标签7_标题.set ('秒')
        self.标签7 = tk.Label (self.启动窗口, textvariable=self.标签7_标题, anchor=tk.W)
        self.标签7.place (x=321, y=265, width=30, height=25)

        self.按钮2_标题 = tk.StringVar ()
        self.按钮2_标题.set ('开始截取视频')
        self.按钮2 = tk.Button (self.启动窗口, textvariable=self.按钮2_标题, command=self.按钮2_被鼠标左键单击)
        self.按钮2.place (x=356, y=236, width=97, height=47)

        self.按钮4_标题 = tk.StringVar ()
        self.按钮4_标题.set ('对截取的视频进行字幕识别')
        self.按钮4 = tk.Button (self.启动窗口, textvariable=self.按钮4_标题, command=self.按钮4_被鼠标左键单击)
        self.按钮4.place (x=470, y=239, width=150, height=42)

        self.按钮5_标题 = tk.StringVar ()
        self.按钮5_标题.set ('合成视频和字幕')
        self.按钮5 = tk.Button (self.启动窗口, textvariable=self.按钮5_标题, command=self.按钮5_被鼠标左键单击)
        self.按钮5.place (x=164, y=308, width=232, height=40)

        self.进度条=ttk.Progressbar(self.启动窗口,length=200, mode="indeterminate",
                     maximum=200,orient=tk.HORIZONTAL)
        self.进度条.place(x=164, y=360, width=232, height=40)

生产示例:
基于python Moviepy的视频字幕识别和合成!_第3张图片
视频地址选择:

 def 按钮1_被鼠标左键单击(self):
        file_path = filedialog.askopenfilename (title=u'选择文件', initialdir=(os.path.expanduser ('F:/迅雷下载/')))
        print (file_path)
        self.编辑框3_内容.set(" ")
        self.编辑框3_内容.set(file_path)

视频截取:

    def 按钮2_被鼠标左键单击(self):
        self.appid = self.编辑框1.get ()
        self.secret_key = self.编辑框2.get ()
        self.video_path = str((self.编辑框3.get ())).strip()
        self.start_time = int(self.编辑框4.get ())
        self.end_time= int(self.编辑框5.get ())
        print (len(self.video_path))
        #self.video_path='F:/迅雷下载/闪闪的红星1974.1080p.HDTV.x264.AAC-HQC.mp4'
        #print (len(self.video_path))
        self.进度条.start ()
        self.videoclip = VideoFileClip (self.video_path).subclip(self.start_time, self.end_time)

        video = CompositeVideoClip ([self.videoclip])

        # 把最后生成的视频导出到文件内
        self.out_video_path=filedialog.asksaveasfilename(title=u'保存文件,请输入文件名', initialdir=(os.path.expanduser ('F:/迅雷下载/')))
        tkinter.messagebox.showwarning ('提示','请等待视频截取,直到弹出成功截取的对话框')

        video.write_videofile (self.out_video_path+".mp4")

        tkinter.messagebox.showwarning ('提示','视频截取成功')
        self.进度条.stop ()

截取视频的字幕识别:

    def 按钮4_被鼠标左键单击(self):
        audioclip = self.videoclip.audio
        mp3_path=self.out_video_path+".wav"
        appid=self.编辑框1_内容.get()
        sk=self.编辑框2_内容.get()
        self.进度条.start ()
        tkinter.messagebox.showwarning ('提示','请等待字幕提取与识别,直到弹出识别成功对话框')
        audioclip.write_audiofile(mp3_path)
        api = RequestApi (appid=appid, secret_key=sk,upload_file_path=mp3_path)  #调用讯飞接口

        data=api.all_api_request ()
        tkinter.messagebox.showwarning ('提示','字幕提取和识别成功!')

        self.进度条.stop ()
        result = data['data']
        result = eval (result)  #使用eval函数将字符串转变为可操作的表达式
        print (result)

        self.get_srt(result)   #调取字幕生产程序

使用ffmpeg字幕和视频的合并:

    def 按钮5_被鼠标左键单击(self):
        self.sum_video_path=self.out_video_path+"sum.mp4"
        mp4_path=self.out_video_path + ".mp4"
        cmdLine = "ffmpeg -i " + mp4_path + " -vf subtitles=" + "\\\'" + self.srt_files_path + "\\\'" + " -y " + self.sum_video_path
        #subprocess.call(cmdLine, shell=False)
        self.进度条.start ()
        tkinter.messagebox.showwarning ('提示','请等待视频与字幕合并,直到弹出合并成功对话框')

        os.system (cmdLine)
        tkinter.messagebox.showwarning ('提示','合并成功,请观看视频!视频保持在截取的视频同一目录下!')
        self.进度条.stop ()

字幕的生成:

    def get_srt(self,result):
        self.srt_files_path=self.out_video_path+'.srt'
        file = ''  # 这个变量用来保存数据
        i = 1
        for each in list(result):
            print(each)
            start = float(each['bg'])/1000 # 获取开始时间
            stop = float(each['ed'])/1000  # 获取结束时间
            content = each['onebest']  # 获取字幕内容
            file += '{}\n'.format (i)  # 加入序号
            hour = math.floor (start) // 3600
            minute = (math.floor (start) - hour * 3600) // 60
            sec = math.floor (start) - hour * 3600 - minute * 60
            minisec = int (math.modf (start)[0] * 100)  # 处理开始时间
            file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str (
                minisec).zfill (2)  # 将数字填充0并按照格式写入
            file += ' --> '
            hour = math.floor (stop) // 3600
            minute = (math.floor (stop) - hour * 3600) // 60
            sec = math.floor (stop) - hour * 3600 - minute * 60
            minisec = abs (int (math.modf (stop)[0] * 100 - 1))  # 此处减1是为了防止两个字幕同时出现
            file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str (
                minisec).zfill (2)
            file += '\n' + content + '\n\n'  # 加入字幕文字
            i += 1
        with open (self.srt_files_path, 'w', encoding='utf-8') as f:
                f.write (file)  # 将数据写入文件

结语:
1、学习使用了Movepy 模块
2、字符串转表达式的eval函数使用
3、讯飞接口使用。
4、滚动条还是不会用。。。

成品下载地址:https://download.csdn.net/download/zxl7725103/12966151

你可能感兴趣的:(Python,影视,python,tkinter)