使用百度识图从图片获取内容,再使用百度语音合成与科大讯飞的语音合成把图片内容合成语音进行播放,并使用pyqt5画界面

主要逻辑:
使用pyqt5画出界面
点击上传图片按钮上传图片(最多16张),点击图片获取图片内容,点击播放按钮获取合成音频并播放。

啥都不说,上图上主要代码
使用百度识图从图片获取内容,再使用百度语音合成与科大讯飞的语音合成把图片内容合成语音进行播放,并使用pyqt5画界面_第1张图片


#日志模块
class Logger:
    def __init__(self,filname):
        self.log_file_path = path.join(path.dirname(path.abspath(__file__)), filname)
        self.filename = filname
    def get_logger(self, verbosity=1, name=None):
        level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING}
        # formatter = logging.Formatter(
        #     "[%(asctime)s][%(filename)s][%(funcName)s][line:%(lineno)d][%(levelname)s] %(message)s"
        # )
        formatter = logging.Formatter(
            "[%(asctime)s][%(funcName)s][line:%(lineno)d][%(levelname)s] %(message)s"
        )
        logger = logging.getLogger(name)
        logger.setLevel(level_dict[verbosity])

        fh = logging.FileHandler(self.log_file_path, "w")
        fh.setFormatter(formatter)
        logger.addHandler(fh)
        sh = logging.StreamHandler()
        sh.setFormatter(formatter)
        logger.addHandler(sh)
        return logger

log = Logger('runlog.log').get_logger()
#Mp3转wav
def mp3Towav():
    # print("strat to")
    log.info(" mp3Towav strat")
    # if path.exists(audioFilewav):
    #     os.remove(audioFilewav)
    if path.exists(audioFilemp3):
        song = AudioSegment.from_mp3(audioFilemp3)
        song.export(audioFilewav, format="wav")
        log.info(" mp3Towav finished")
        # print("end to")
    else:
        log.info(" mp3Towav nofile")
        # print('nofile')

#音频播放模块使用的pyaudio
class audioPlay():
    def __init__(self):
        self.wf=''
        self.pl=''
        self.stream=''
    def openfile(self):
        if path.exists(audioFilewav):
            self.wf = wave.open(audioFilewav, 'rb')
        else:
            self.w=''
    def setpl(self):
        self.pl = pyaudio.PyAudio()
    def setstream(self):
        self.stream = self.pl.open(format=self.pl.get_format_from_width(self.wf.getsampwidth()),
                        channels=self.wf.getnchannels(),
                        rate=self.wf.getframerate(),
                        output=True,
                        stream_callback=self.callback)

    def callback(self,in_data, frame_count, time_info, status):
        data = self.wf.readframes(frame_count)
        return (data, pyaudio.paContinue)
    def stramStart(self):
        self.stream.start_stream()
    def stremStop(self):
        self.stream.stop_stream()

    def startplay(self):
        global playing
        log.info("play start")
        self.openfile()
        if self.wf=='':
            # print("not have file")
            log.info("not have file")
            return
        self.setpl()

        self.setstream()
        self.stramStart()
        playing = True
        while self.stream.is_active() or paused == True:
            pass
        log.info("play finished")
    def  stram_is_stoped(self):
        return self.stream.is_stopped()
    def stream_is_active(self):
        return self.stream.is_active()
    def stopPlay(self):
        global playing
        global paused
        if self.stream.is_active():
            self.stream.stop_stream()
        self.stream.close()
        self.wf.close()
        # close PyAudio
        self.pl.terminate()
        playing=False
        paused=False

STATUS_FIRST_FRAME = 0  # 第一帧的标识
STATUS_CONTINUE_FRAME = 1  # 中间帧标识
STATUS_LAST_FRAME = 2  # 最后一帧的标识

auply=audioPlay()
#科大讯飞的音频合成模块中初始化参数与拼接URL
class Ws_Param(object):
    # 初始化
    def __init__(self, APPID, APIKey, APISecret, Text,vcn,speed):
        self.APPID = APPID
        self.APIKey = APIKey
        self.APISecret = APISecret
        self.Text = Text
        self.vcn = vcn
        self.speed = speed

        # 公共参数(common)
        self.CommonArgs = {"app_id": self.APPID}
        # 业务参数(business),更多个性化参数可在官网查看
        self.BusinessArgs = {"aue": "lame","sfl":1,"auf": "audio/L16;rate=16000", "vcn":str(self.vcn) , "tte": "utf8","speed":int(self.speed)}
        self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")}

    # 生成url
    def create_url(self):
        url = 'wss://tts-api.xfyun.cn/v2/tts'
        # 生成RFC1123格式的时间戳
        now = datetime.now()
        date = format_date_time(mktime(now.timetuple()))

        # 拼接字符串
        signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
        signature_origin += "date: " + date + "\n"
        signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
        # 进行hmac-sha256进行加密
        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
                                 digestmod=hashlib.sha256).digest()
        signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')

        authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
            self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
        # 将请求的鉴权参数组合为字典
        v = {
            "authorization": authorization,
            "date": date,
            "host": "ws-api.xfyun.cn"
        }
        # 拼接鉴权参数,生成url
        url = url + '?' + urlencode(v)
        return url



#科大讯飞的音频合成模块中进行网络数据请求
class kedaxunfei():
    def __init__(self,text,vcn,speed):
        self.text = text
        self.vcn = vcn
        self.speed = speed
        self.wsParam=''
        self.wsUrl=''
        self.ws=''
    def stratPlay(self):
        self.wsParam = Ws_Param(APPID='5eb5xxxx', APIKey='9db0xxxxx',
                                APISecret='bec6xxxxxx',
                                Text=self.text, vcn=self.vcn, speed=self.speed)
        websocket.enableTrace(False)
        self.wsUrl = self.wsParam.create_url()
        self.ws = websocket.WebSocketApp(self.wsUrl, on_message=self.on_message, on_error=self.on_error,
                                         on_close=self.on_close)
        self.ws.on_open = self.on_open
        self.ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
        # print("llllllllll")

    def on_open(self):
        global thead
        def run(*args):
            d = {"common": self.wsParam.CommonArgs,
                 "business": self.wsParam.BusinessArgs,
                 "data": self.wsParam.Data,
                 }
            d = json.dumps(d)
            # print("------>开始发送文本数据")
            self.ws.send(d)
            global playing
            if playing == True:
                log.info("playing")
                auply.stopPlay()
            if os.path.exists(audioFilemp3):
                try:
                    os.remove(audioFilemp3)
                except IOError:
                    log.info('文件被占用')

        thread.start_new_thread(run, ())

    def on_message(self, message):
        try:
            message = json.loads(message)
            code = message["code"]
            sid = message["sid"]
            audio = message["data"]["audio"]
            audio = base64.b64decode(audio)
            status = message["data"]["status"]
            # print(message)
            if status == 2:
                log.info("ws is closed")
                self.ws.close()
            if code != 0:
                errMsg = message["message"]
                # print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
                log.info("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
            else:
                with open(audioFilemp3, 'ab') as f:
                    f.write(audio)
        except Exception as e:
            # print("receive msg,but parse exception:", e)
            log.info("receive msg,but parse exception:", e)


    # 收到websocket错误的处理
    def on_error(self,ws, error):
        print("### error:", error)

    # 收到websocket关闭的处理
    def on_close(self,ws):
        print("### closed ###")

    def playAutio(self):
        global playing
        global thead
        if playing==True:
            # print("playing")
            log.info("playing")
            auply.stopPlay()
        mp3Towav()
        auply.startplay()
    def playRun(self):
        log.info('keda audio')
        self.stratPlay()
        self.playAutio()



#自定义label
class MyLabel(QLabel):
    sendMsg = pyqtSignal(str)
    def __init__(self):
        super().__init__()
    def mousePressEvent(self, e):
        objectname = self.objectName()
        self.sendMsg.emit(objectname)


#百度语音合成模块
class baidu:
    def __init__(self,text,pre,speed):
        self.text = str(text).encode('utf-8')
        self.pre = pre
        self.speed = speed
    def playAutio(self):
        global thead
        global playing
        if playing==True:
            auply.stopPlay()
        if os.path.exists(audioFilemp3):
            try:
                os.remove(audioFilemp3)
            except IOError:
                # print('文件被占用')
                log.info('文件被占用')
        result = clientAudio.synthesis(self.text, 'zh', 1, {
            'per': self.pre,
            'spd': self.speed,  # 速度
            'vol': 7  # 音量
        })
        if not isinstance(result, dict):
            with open(audioFilemp3, 'wb') as f:
                f.write(result)
            mp3Towav()
        else:
            print(result)
        log.info('baidu audio')
        auply.startplay()

主要的核心代码,也是边学边用,有需要全代码的可留言
还有要优化的地方,有时间再优化一波

你可能感兴趣的:(Python相关学习)