前言:前几天看到一篇《用Python解决女朋友看电影没字幕的需求》,觉得很有意思。但是有道的识别没有做过,找了下其他的接口,发现讯飞的识别比较准确。就做了这个小程序。
讯飞接口:https://console.xfyun.cn/services/lfasr
申请api和SK。
免费时长:新用户可识别的视频时长能达到5个小时。用多少算多少。一个月的有效期。
下载调用示例demo:https://www.xfyun.cn/doc/asr/lfasr/API.html#%E8%B0%83%E7%94%A8%E7%A4%BA%E4%BE%8B
下载demo就可以直接利用其接口生产识别文件,不用自己去写识别程序了。
主要程序:
1、窗体文件
class PyWinDesign:
def __init__(self, 启动窗口):
self.启动窗口 = 启动窗口
self.appid=""
self.secret_key=""
self.video_path=""
self.finish_video_path=""
self.start_time=""
self.end_time=""
self.videoclip=""
self.out_video_path=""
self.srt_files_path=""
self.sum_video_path=""
self.启动窗口 = 启动窗口
self.启动窗口.title ('视频字幕识别和合成')
self.启动窗口.resizable (width=False, height=False)
screenwidth = self.启动窗口.winfo_screenwidth ()
screenheight = self.启动窗口.winfo_screenheight ()
size = '%dx%d+%d+%d' % (644, 400, (screenwidth - 644) / 2, (screenheight - 400) / 2)
self.启动窗口.geometry (size)
self.标签1_标题 = tk.StringVar ()
self.标签1_标题.set ('讯飞接口:')
self.标签1 = tk.Label (self.启动窗口, textvariable=self.标签1_标题, anchor=tk.W)
self.标签1.place (x=19, y=22, width=68, height=31)
self.标签2_标题 = tk.StringVar ()
self.标签2_标题.set ('Appid:')
self.标签2 = tk.Label (self.启动窗口, textvariable=self.标签2_标题)
self.标签2.place (x=20, y=71, width=64, height=28)
self.编辑框1_内容 = tk.StringVar ()
self.编辑框1_内容.set ('')
self.编辑框1 = ttk.Entry (self.启动窗口, textvariable=self.编辑框1_内容, justify=tk.LEFT)
self.编辑框1.place (x=98, y=71, width=141, height=32)
self.标签3_标题 = tk.StringVar ()
self.标签3_标题.set ('Secret_key:')
self.标签3 = tk.Label (self.启动窗口, textvariable=self.标签3_标题)
self.标签3.place (x=21, y=106, width=67, height=39)
self.编辑框2_内容 = tk.StringVar ()
self.编辑框2_内容.set ('')
self.编辑框2 = tk.Entry (self.启动窗口, textvariable=self.编辑框2_内容, justify=tk.LEFT)
self.编辑框2.place (x=99, y=113, width=504, height=35)
self.按钮1_标题 = tk.StringVar ()
self.按钮1_标题.set ('选择待识别视频地址')
self.按钮1 = tk.Button (self.启动窗口, textvariable=self.按钮1_标题, command=self.按钮1_被鼠标左键单击)
self.按钮1.place (x=22, y=186, width=120, height=39)
self.编辑框3_内容 = tk.StringVar ()
self.编辑框3_内容.set ('')
self.编辑框3 = tk.Entry (self.启动窗口, textvariable=self.编辑框3_内容, justify=tk.LEFT)
self.编辑框3.place (x=158, y=192, width=443, height=31)
self.标签4_标题 = tk.StringVar ()
self.标签4_标题.set ('视频识别开始时间:')
self.标签4 = tk.Label (self.启动窗口, textvariable=self.标签4_标题, anchor=tk.W)
self.标签4.place (x=156, y=226, width=106, height=35)
self.编辑框4_内容 = tk.StringVar ()
self.编辑框4_内容.set ('')
self.编辑框4 = tk.Entry (self.启动窗口, textvariable=self.编辑框4_内容, justify=tk.LEFT)
self.编辑框4.place (x=268, y=225, width=43, height=31)
self.标签5_标题 = tk.StringVar ()
self.标签5_标题.set ('秒')
self.标签5 = tk.Label (self.启动窗口, textvariable=self.标签5_标题, anchor=tk.W)
self.标签5.place (x=321, y=231, width=27, height=24)
self.标签6_标题 = tk.StringVar ()
self.标签6_标题.set ('视频识别结束时间:')
self.标签6 = tk.Label (self.启动窗口, textvariable=self.标签6_标题, anchor=tk.W)
self.标签6.place (x=157, y=266, width=103, height=28)
self.编辑框5_内容 = tk.StringVar ()
self.编辑框5_内容.set ('')
self.编辑框5 = tk.Entry (self.启动窗口, textvariable=self.编辑框5_内容, justify=tk.LEFT)
self.编辑框5.place (x=267, y=264, width=43, height=28)
self.标签7_标题 = tk.StringVar ()
self.标签7_标题.set ('秒')
self.标签7 = tk.Label (self.启动窗口, textvariable=self.标签7_标题, anchor=tk.W)
self.标签7.place (x=321, y=265, width=30, height=25)
self.按钮2_标题 = tk.StringVar ()
self.按钮2_标题.set ('开始截取视频')
self.按钮2 = tk.Button (self.启动窗口, textvariable=self.按钮2_标题, command=self.按钮2_被鼠标左键单击)
self.按钮2.place (x=356, y=236, width=97, height=47)
self.按钮4_标题 = tk.StringVar ()
self.按钮4_标题.set ('对截取的视频进行字幕识别')
self.按钮4 = tk.Button (self.启动窗口, textvariable=self.按钮4_标题, command=self.按钮4_被鼠标左键单击)
self.按钮4.place (x=470, y=239, width=150, height=42)
self.按钮5_标题 = tk.StringVar ()
self.按钮5_标题.set ('合成视频和字幕')
self.按钮5 = tk.Button (self.启动窗口, textvariable=self.按钮5_标题, command=self.按钮5_被鼠标左键单击)
self.按钮5.place (x=164, y=308, width=232, height=40)
self.进度条=ttk.Progressbar(self.启动窗口,length=200, mode="indeterminate",
maximum=200,orient=tk.HORIZONTAL)
self.进度条.place(x=164, y=360, width=232, height=40)
def 按钮1_被鼠标左键单击(self):
file_path = filedialog.askopenfilename (title=u'选择文件', initialdir=(os.path.expanduser ('F:/迅雷下载/')))
print (file_path)
self.编辑框3_内容.set(" ")
self.编辑框3_内容.set(file_path)
视频截取:
def 按钮2_被鼠标左键单击(self):
self.appid = self.编辑框1.get ()
self.secret_key = self.编辑框2.get ()
self.video_path = str((self.编辑框3.get ())).strip()
self.start_time = int(self.编辑框4.get ())
self.end_time= int(self.编辑框5.get ())
print (len(self.video_path))
#self.video_path='F:/迅雷下载/闪闪的红星1974.1080p.HDTV.x264.AAC-HQC.mp4'
#print (len(self.video_path))
self.进度条.start ()
self.videoclip = VideoFileClip (self.video_path).subclip(self.start_time, self.end_time)
video = CompositeVideoClip ([self.videoclip])
# 把最后生成的视频导出到文件内
self.out_video_path=filedialog.asksaveasfilename(title=u'保存文件,请输入文件名', initialdir=(os.path.expanduser ('F:/迅雷下载/')))
tkinter.messagebox.showwarning ('提示','请等待视频截取,直到弹出成功截取的对话框')
video.write_videofile (self.out_video_path+".mp4")
tkinter.messagebox.showwarning ('提示','视频截取成功')
self.进度条.stop ()
截取视频的字幕识别:
def 按钮4_被鼠标左键单击(self):
audioclip = self.videoclip.audio
mp3_path=self.out_video_path+".wav"
appid=self.编辑框1_内容.get()
sk=self.编辑框2_内容.get()
self.进度条.start ()
tkinter.messagebox.showwarning ('提示','请等待字幕提取与识别,直到弹出识别成功对话框')
audioclip.write_audiofile(mp3_path)
api = RequestApi (appid=appid, secret_key=sk,upload_file_path=mp3_path) #调用讯飞接口
data=api.all_api_request ()
tkinter.messagebox.showwarning ('提示','字幕提取和识别成功!')
self.进度条.stop ()
result = data['data']
result = eval (result) #使用eval函数将字符串转变为可操作的表达式
print (result)
self.get_srt(result) #调取字幕生产程序
使用ffmpeg字幕和视频的合并:
def 按钮5_被鼠标左键单击(self):
self.sum_video_path=self.out_video_path+"sum.mp4"
mp4_path=self.out_video_path + ".mp4"
cmdLine = "ffmpeg -i " + mp4_path + " -vf subtitles=" + "\\\'" + self.srt_files_path + "\\\'" + " -y " + self.sum_video_path
#subprocess.call(cmdLine, shell=False)
self.进度条.start ()
tkinter.messagebox.showwarning ('提示','请等待视频与字幕合并,直到弹出合并成功对话框')
os.system (cmdLine)
tkinter.messagebox.showwarning ('提示','合并成功,请观看视频!视频保持在截取的视频同一目录下!')
self.进度条.stop ()
字幕的生成:
def get_srt(self,result):
self.srt_files_path=self.out_video_path+'.srt'
file = '' # 这个变量用来保存数据
i = 1
for each in list(result):
print(each)
start = float(each['bg'])/1000 # 获取开始时间
stop = float(each['ed'])/1000 # 获取结束时间
content = each['onebest'] # 获取字幕内容
file += '{}\n'.format (i) # 加入序号
hour = math.floor (start) // 3600
minute = (math.floor (start) - hour * 3600) // 60
sec = math.floor (start) - hour * 3600 - minute * 60
minisec = int (math.modf (start)[0] * 100) # 处理开始时间
file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str (
minisec).zfill (2) # 将数字填充0并按照格式写入
file += ' --> '
hour = math.floor (stop) // 3600
minute = (math.floor (stop) - hour * 3600) // 60
sec = math.floor (stop) - hour * 3600 - minute * 60
minisec = abs (int (math.modf (stop)[0] * 100 - 1)) # 此处减1是为了防止两个字幕同时出现
file += str (hour).zfill (2) + ':' + str (minute).zfill (2) + ':' + str (sec).zfill (2) + ',' + str (
minisec).zfill (2)
file += '\n' + content + '\n\n' # 加入字幕文字
i += 1
with open (self.srt_files_path, 'w', encoding='utf-8') as f:
f.write (file) # 将数据写入文件
结语:
1、学习使用了Movepy 模块
2、字符串转表达式的eval函数使用
3、讯飞接口使用。
4、滚动条还是不会用。。。
成品下载地址:https://download.csdn.net/download/zxl7725103/12966151