import tkinter as tk
from tkinter import ttk
from tkinter.filedialog import *
from tkinter.messagebox import showinfo
from aip import AipSpeech
from pydub import AudioSegment
from pydub.silence import split_on_silence
import os
import shutil
import subprocess
#选择文件
def choice_file():
global filepath
filepath = askopenfilename() # 选择打开什么文件,返回文件名
filename.set(filepath) # 设置变量filename的值
#选择文件格式
def choice_file_type(event):
global newfiletype
newfiletype=com.get()
print(com.get())
print(newtype.get())
#选择保存位置
def choice_file_folder():
global filefolderPath
filefolderPath = askdirectory() # 选择目录,返回目录名
outputpath.set(filefolderPath) # 设置变量outputpath的值
#对音频文件处理
def filehandle():
# 初始化
global filepath
global filename
global filefolderPath
global newfiletype
tk.Button(win, text='转换中。。。', bd=1, bg='dodgerblue', fg='white', width=38).place(x=60, y=196)
# 设置进度条
progress_bar_num = 0
progressbarOne = tk.ttk.Progressbar(win,length=274)
progressbarOne.place(x=60, y=228)
# 进度值最大值
progressbarOne['maximum'] = 100
newfilename = newname.get()#获取新文件名
cut_filepath = filepath[0:-4] # 去后缀名
# 操作subprocess模块,转换音频格式为mp3
subprocess.call(('ffmpeg -loglevel quiet -y -i ' + filepath + ' -f mp3 ' + cut_filepath + '.mp3'),shell=True, stdin=subprocess.PIPE)
filepath = cut_filepath + '.mp3'
audiotype = 'mp3' # 如果wav、mp4其他格式参看pydub.AudioSegment的API
# 读入音频
print('读入音频')
#sound = open(filepath,'w+b')
sound = AudioSegment.from_mp3(filepath)
#sound = sound[:3*60*1000] #如果文件较大,先取前3分钟测试,根据测试结果,调整参数
# 分割
print('开始分割')
chunks = split_on_silence(sound, min_silence_len=300, silence_thresh=-40,
keep_silence=True)
# min_silence_len: 拆分语句时,静默满0.5秒则拆分。silence_thresh:小于-45dBFS以下的为静默。keep_silence:True保留静音片段
os.remove(cut_filepath+'.mp3')#删除转换后的MP3文件
# 创建保存目录
filepath = os.path.split(filepath)[0]
chunks_path = filepath + '/chunks/'
if not os.path.exists(chunks_path): os.mkdir(chunks_path)
# 保存所有分段
print('开始保存')
nowlen = 0 # 当前总毫秒数
#新建文件
f = open(filefolderPath+'/'+newfilename+newfiletype,"a")
f.write(newfilename)
progress_bar_block = 100/len(chunks)/2
for i in range(len(chunks)):
new = chunks[i]
if len(chunks[i]) >= 1500 and len(chunks[i]) <= 10000: # 只选择长度大于2s小于10s的片段
save_name = chunks_path + '%04d.%s' % (i, audiotype)
new.export(save_name, format=audiotype)
progress_bar_num = progress_bar_num + progress_bar_block
progressbarOne['value'] = progress_bar_num # 进度条长度
win.update()
for i in range(len(chunks)):
new = chunks[i]
if len(chunks[i]) >= 1500 and len(chunks[i]) <= 10000: # 只选择长度大于2s小于10s的片段
save_name = chunks_path + '%04d.%s' % (i, audiotype)
#new.export(save_name, format=audiotype)
nowsecond = nowlen / 1000 # 得当前总秒数
second = nowsecond % 60 # 秒
minute = (nowsecond / 60) % 60 # 分
#hour = (nowsecond / 3600) % 24 # 时
timedata='\n\n'+str('%02d:%02d'%(minute, second))+'\n'
f.write(timedata)
f.write(mp3_characters(save_name))
#print("%04d %02d:%02d" % (j, minute, second), nowlen)
nowlen = nowlen + len(new) # 当前毫秒数
progress_bar_num=progress_bar_num+progress_bar_block
progressbarOne['value'] = progress_bar_num # 进度条长度
win.update()
f.close()
print('保存完毕')
shutil.rmtree(chunks_path)#删除文件夹
showinfo(message='转换成功!')
#清空状态
progressbarOne['value'] = 0 # 进度条长度
tk.Label(win, text='', bd=0, bg='white', fg='white', width=40).place(x=60, y=228)
tk.Label(win, text='', bd=0, bg='white', fg='white', width=40).place(x=60, y=232)
filepath =''
filefolderPath = ''
filename.set(filepath) # 设置变量filename的值
outputpath.set(filefolderPath) # 设置变量outputpath的值
newname.set("")
newtype.set('.doc')
# 构建“转换”这一行的标签、输入框以及启动按钮
tk.Button(win, text='开始转换', command=filehandle, bd=1, bg='dodgerblue', fg='white', width=38).place(x=60, y=196)
#语音转pcm,pcm处理
def mp3_characters(filepath):
cur_dir = os.path.dirname(os.path.abspath(__file__)) # 获取上级目录
# print(cur_dir)
cut_filepath=filepath[0:-4] #去后缀名
# 操作subprocess,转换音频格式
cmd = 'ffmpeg -loglevel quiet -y -i ' + filepath + ' -acodec pcm_s16le -f s16le -ac 1 -ar 16000 ' + cut_filepath + '.pcm'
subprocess.call(cmd, shell=True,stdin=subprocess.PIPE)
#百度云接口,需要主动申请
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
with open(cut_filepath + '.pcm', 'rb') as fp:
au = fp.read()
res = client.asr(au, 'pcm', 16000, {'dev_pid': 1537, })
return "".join(res['result'])
#创建主窗口
win = tk.Tk()
#设置标题
win.title("语音转文本")
#设置背景
canvas = tk.Canvas(win, bg="white", height=400, width=700, borderwidth=-3) # 创建画布
canvas.pack(side='top') # 放置画布(为上端)
image_file = tk.PhotoImage(file="背景.png") # 加载图片文件
canvas.create_image(0, 0, anchor='nw', image=image_file) # 将图片置于画布上
#设置大小和位置
win.geometry("400x300+400+100")
#固定页面大小
win.resizable(False,False)
#程序图标
win.iconbitmap("程序图标.ico")
#获取输入框的文本
filename = tk.StringVar()
outputpath = tk.StringVar()
newname = tk.StringVar()
newtype = tk.StringVar()
#全局变量
filepath = ''#待处理文件
filefolderPath = ''#新文件保存位置
newfiletype = '.doc'#新文件类型
newfilename =''#新文件名
progress_bar_num=0#进度条进度
# 构建“选择文件”这一行的标签、输入框以及启动按钮,同时我们希望当用户选择图片之后能够显示原图的基本信息
tk.Label(win, text='选择文件',bg='white').place(x=60,y=65)
tk.Entry(win, textvariable=filename,bg='ghostwhite').place(height=26,x=115,y=65)
tk.Button(win, text='打开文件', command=choice_file,bd=1,bg='white',padx=7,pady=0).place(x=265,y=65)
# 下拉菜单选择文件类型
tk.Label(win, text='新文件名',bg='white').place(x=60,y=150)
tk.Entry(win, textvariable=newname,bg='ghostwhite').place(height=26,x=115,y=150)
com = ttk.Combobox(win, textvariable=newtype) #创建下拉菜单
com.place(height=26,width=70,x=265,y=150) #位置
com["value"] = (".doc", ".docx", ".txt") #给下拉菜单设定值
com.current(0) #设定下拉菜单的默认值为第1个,即.doc
com.bind("<>", choice_file_type) # #给下拉菜单绑定事件
# 构建“选择文件夹”这一行的标签、输入框以及启动按钮
tk.Label(win, text='保存位置',bg='white').place(x=60,y=110)
tk.Entry(win, textvariable=outputpath,bg='ghostwhite').place(height=26,x=115,y=110)
tk.Button(win, text='选择文件夹', command=choice_file_folder,bd=1,bg='white',pady=0).place(x=265,y=110)
# 构建“转换”这一行的标签、输入框以及启动按钮
tk.Button(win, text='开始转换', command=filehandle,bd=1,bg='dodgerblue',fg='white',width=38).place(x=60,y=196)
win.mainloop()