主要逻辑:
使用pyqt5画出界面
点击上传图片按钮上传图片(最多16张),点击图片获取图片内容,点击播放按钮获取合成音频并播放。
#日志模块
class Logger:
def __init__(self,filname):
self.log_file_path = path.join(path.dirname(path.abspath(__file__)), filname)
self.filename = filname
def get_logger(self, verbosity=1, name=None):
level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING}
# formatter = logging.Formatter(
# "[%(asctime)s][%(filename)s][%(funcName)s][line:%(lineno)d][%(levelname)s] %(message)s"
# )
formatter = logging.Formatter(
"[%(asctime)s][%(funcName)s][line:%(lineno)d][%(levelname)s] %(message)s"
)
logger = logging.getLogger(name)
logger.setLevel(level_dict[verbosity])
fh = logging.FileHandler(self.log_file_path, "w")
fh.setFormatter(formatter)
logger.addHandler(fh)
sh = logging.StreamHandler()
sh.setFormatter(formatter)
logger.addHandler(sh)
return logger
log = Logger('runlog.log').get_logger()
#Mp3转wav
def mp3Towav():
# print("strat to")
log.info(" mp3Towav strat")
# if path.exists(audioFilewav):
# os.remove(audioFilewav)
if path.exists(audioFilemp3):
song = AudioSegment.from_mp3(audioFilemp3)
song.export(audioFilewav, format="wav")
log.info(" mp3Towav finished")
# print("end to")
else:
log.info(" mp3Towav nofile")
# print('nofile')
#音频播放模块使用的pyaudio
class audioPlay():
def __init__(self):
self.wf=''
self.pl=''
self.stream=''
def openfile(self):
if path.exists(audioFilewav):
self.wf = wave.open(audioFilewav, 'rb')
else:
self.w=''
def setpl(self):
self.pl = pyaudio.PyAudio()
def setstream(self):
self.stream = self.pl.open(format=self.pl.get_format_from_width(self.wf.getsampwidth()),
channels=self.wf.getnchannels(),
rate=self.wf.getframerate(),
output=True,
stream_callback=self.callback)
def callback(self,in_data, frame_count, time_info, status):
data = self.wf.readframes(frame_count)
return (data, pyaudio.paContinue)
def stramStart(self):
self.stream.start_stream()
def stremStop(self):
self.stream.stop_stream()
def startplay(self):
global playing
log.info("play start")
self.openfile()
if self.wf=='':
# print("not have file")
log.info("not have file")
return
self.setpl()
self.setstream()
self.stramStart()
playing = True
while self.stream.is_active() or paused == True:
pass
log.info("play finished")
def stram_is_stoped(self):
return self.stream.is_stopped()
def stream_is_active(self):
return self.stream.is_active()
def stopPlay(self):
global playing
global paused
if self.stream.is_active():
self.stream.stop_stream()
self.stream.close()
self.wf.close()
# close PyAudio
self.pl.terminate()
playing=False
paused=False
STATUS_FIRST_FRAME = 0 # 第一帧的标识
STATUS_CONTINUE_FRAME = 1 # 中间帧标识
STATUS_LAST_FRAME = 2 # 最后一帧的标识
auply=audioPlay()
#科大讯飞的音频合成模块中初始化参数与拼接URL
class Ws_Param(object):
# 初始化
def __init__(self, APPID, APIKey, APISecret, Text,vcn,speed):
self.APPID = APPID
self.APIKey = APIKey
self.APISecret = APISecret
self.Text = Text
self.vcn = vcn
self.speed = speed
# 公共参数(common)
self.CommonArgs = {"app_id": self.APPID}
# 业务参数(business),更多个性化参数可在官网查看
self.BusinessArgs = {"aue": "lame","sfl":1,"auf": "audio/L16;rate=16000", "vcn":str(self.vcn) , "tte": "utf8","speed":int(self.speed)}
self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")}
# 生成url
def create_url(self):
url = 'wss://tts-api.xfyun.cn/v2/tts'
# 生成RFC1123格式的时间戳
now = datetime.now()
date = format_date_time(mktime(now.timetuple()))
# 拼接字符串
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
signature_origin += "date: " + date + "\n"
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
# 进行hmac-sha256进行加密
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
# 将请求的鉴权参数组合为字典
v = {
"authorization": authorization,
"date": date,
"host": "ws-api.xfyun.cn"
}
# 拼接鉴权参数,生成url
url = url + '?' + urlencode(v)
return url
#科大讯飞的音频合成模块中进行网络数据请求
class kedaxunfei():
def __init__(self,text,vcn,speed):
self.text = text
self.vcn = vcn
self.speed = speed
self.wsParam=''
self.wsUrl=''
self.ws=''
def stratPlay(self):
self.wsParam = Ws_Param(APPID='5eb5xxxx', APIKey='9db0xxxxx',
APISecret='bec6xxxxxx',
Text=self.text, vcn=self.vcn, speed=self.speed)
websocket.enableTrace(False)
self.wsUrl = self.wsParam.create_url()
self.ws = websocket.WebSocketApp(self.wsUrl, on_message=self.on_message, on_error=self.on_error,
on_close=self.on_close)
self.ws.on_open = self.on_open
self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
# print("llllllllll")
def on_open(self):
global thead
def run(*args):
d = {"common": self.wsParam.CommonArgs,
"business": self.wsParam.BusinessArgs,
"data": self.wsParam.Data,
}
d = json.dumps(d)
# print("------>开始发送文本数据")
self.ws.send(d)
global playing
if playing == True:
log.info("playing")
auply.stopPlay()
if os.path.exists(audioFilemp3):
try:
os.remove(audioFilemp3)
except IOError:
log.info('文件被占用')
thread.start_new_thread(run, ())
def on_message(self, message):
try:
message = json.loads(message)
code = message["code"]
sid = message["sid"]
audio = message["data"]["audio"]
audio = base64.b64decode(audio)
status = message["data"]["status"]
# print(message)
if status == 2:
log.info("ws is closed")
self.ws.close()
if code != 0:
errMsg = message["message"]
# print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
log.info("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
else:
with open(audioFilemp3, 'ab') as f:
f.write(audio)
except Exception as e:
# print("receive msg,but parse exception:", e)
log.info("receive msg,but parse exception:", e)
# 收到websocket错误的处理
def on_error(self,ws, error):
print("### error:", error)
# 收到websocket关闭的处理
def on_close(self,ws):
print("### closed ###")
def playAutio(self):
global playing
global thead
if playing==True:
# print("playing")
log.info("playing")
auply.stopPlay()
mp3Towav()
auply.startplay()
def playRun(self):
log.info('keda audio')
self.stratPlay()
self.playAutio()
#自定义label
class MyLabel(QLabel):
sendMsg = pyqtSignal(str)
def __init__(self):
super().__init__()
def mousePressEvent(self, e):
objectname = self.objectName()
self.sendMsg.emit(objectname)
#百度语音合成模块
class baidu:
def __init__(self,text,pre,speed):
self.text = str(text).encode('utf-8')
self.pre = pre
self.speed = speed
def playAutio(self):
global thead
global playing
if playing==True:
auply.stopPlay()
if os.path.exists(audioFilemp3):
try:
os.remove(audioFilemp3)
except IOError:
# print('文件被占用')
log.info('文件被占用')
result = clientAudio.synthesis(self.text, 'zh', 1, {
'per': self.pre,
'spd': self.speed, # 速度
'vol': 7 # 音量
})
if not isinstance(result, dict):
with open(audioFilemp3, 'wb') as f:
f.write(result)
mp3Towav()
else:
print(result)
log.info('baidu audio')
auply.startplay()
主要的核心代码,也是边学边用,有需要全代码的可留言
还有要优化的地方,有时间再优化一波