语音对讲---基于图灵机器人+科大讯飞

    • 功能和注意事项
      • 1 功能
      • 2 注意事项
    • 测试环境
    • 科大讯飞交叉编译库
    • QT实现
    • 界面设计

1. 功能和注意事项

1.1 功能

  • 语音对讲
  • 文字转语音
  • 语音转文字
  • 音量可以调节
  • 语速可以调节

1.2 注意事项

    讯飞语音的语音识别每次识别最长只支持60秒,中间有较长静音的话,也会自动停止识别。
    讯飞SDK不支持多线程调用。即一次session所涉及的begin—end接口必须在同一个线程中调用
    MSPLogin(NULL, NULL,”appid = XXXXXXX”); 不用用户名和密码也可以登录成功。

2. 测试环境

    调试在OK210ARM板上单独调试无问题。

3. 科大讯飞交叉编译库

  这个可以通过注册账号之后,然后需要独立的开发环境的话,则需要向科大讯飞提交申请,为你专门编译开发库。

4. QT实现

qvoiceintercom.h文件
#ifndef QVOICEINTERCOM_H
#define QVOICEINTERCOM_H

#include 
#include 
#include 
#include 

#include    "include/msp_cmn.h"
#include    "include/msp_errors.h"
#include    "include/msp_types.h"
#include    "include/qisr.h"
#include    "include/qtts.h"

#define LENGTH 1        /* 存储秒数 */
#define RATE 16000      /* 采样频率 */
#define SIZE 8          /* 量化位数 */
#define CHANNELS 1      /* 声道数目 */
#define RSIZE    8      /*buf的大小 */

#define PCM_TEXT_TO_SPEECH          "/home/SMARTHOME/info/text_to_speech.pcm"
#define PCM_SOUND                   "/home/SMARTHOME/info/sound.pcm"
#define PCM_SPEAKLOUNDLY            "/home/SMARTHOME/info/speakloundly.pcm"
#define JSON_DIR                    "/home/SMARTHOME/info"

#define     BLOCK_LEN   (LENGTH *RATE * SIZE * CHANNELS / 8)

/*

typedef int SR_DWORD;
typedef short int SR_WORD ;

//音频头部格式
struct wave_pcm_hdr
{
    char            riff[4];                                    // = "RIFF"
    SR_DWORD        size_8;                         // = FileSize - 8
    char            wave[4];                        // = "WAVE"
    char            fmt[4];                         // = "fmt "
    SR_DWORD        dwFmtSize;                      // = 下一个结构体的大小 : 16

    SR_WORD         format_tag;              // = PCM : 1
    SR_WORD         channels;                       // = 通道数 : 1
    SR_DWORD        samples_per_sec;        // = 采样率 : 8000 | 6000 | 11025 | 16000
    SR_DWORD        avg_bytes_per_sec;      // = 每秒字节数 : dwSamplesPerSec * wBitsPerSample / 8
    SR_WORD         block_align;            // = 每采样点字节数 : wBitsPerSample / 8
    SR_WORD         bits_per_sample;         // = 量化比特数: 8 | 16

    char            data[4];                        // = "data";
    SR_DWORD        data_size;                // = 纯数据长度 : FileSize - 44
} ;

//默认音频头部数据
static struct wave_pcm_hdr default_pcmwavhdr =
{
    { 'R', 'I', 'F', 'F' },
    0,
    {'W', 'A', 'V', 'E'},
    {'f', 'm', 't', ' '},
    16,
    1,
    1,
    16000,
    32000,
    2,
    16,
    {'d', 'a', 't', 'a'},
    0
};

*/

class rw_thread;
namespace Ui {
class qvoiceintercom;
class rw_thread;
class vd_thread;
}

class qvoiceintercom : public QWidget
{
    Q_OBJECT

public:
    explicit qvoiceintercom(QWidget *parent = 0);
    ~qvoiceintercom();
/************str2audio**/
private:
     char appid[15] ;               //54b0856b        must  be required
     char username[20];         //appid is not the username     could be NULL
     char passwd[20];              //user's password           could be NULL
     char login_configs[128];  //login_configs = " appid = 54b0856b, work_dir =  .  ";
     char wavfilename[128] ;    //text_to_speech.pcm
     char param[256];            //"vcn = xiaoyu,aue = speex-wb,auf = audio/L16;rate = 1600,spd = 5,vol = 5,tte = utf8"
public:
    //set
    void str2audioInit();
    int setWavfilename(const char * filename);
    void setParam(const char * vcn , int spd, int vol);
    //functions
    int textToSpeech(const char* src_text ,const char* des_path ,const char* params);
/************audio_http**/
private:
    char *info;
    QByteArray json_buf;
    QString turing_url;
    QString *base_url;
    QNetworkAccessManager *manager;
    QNetworkReply     *reply;
    QFile fjson;
    QByteArray vh_code;
    QTimer  *timeOut;
public:
    bool audioHttpInit();
    void turingInput(QString str);
    void rst_analyse(QByteArray qba,bool flag);
    int turingRead(QByteArray j_name,QByteArray *j_data,QByteArray rst_json,int index);
private slots:
    void replyFinished(QNetworkReply *reply);
    void handleTimeOut();
/************audio2str**/
public:
    void audio2strInit();                   //audio2str Initial
    void dspReadEntrance();         //the entrance of starting Record
    void dspWriteEntrance();        //the entrance of starting Play
private slots:
    void rst_manage();
/************voiceIntercom**/
private slots:
    void on_recordButton_pressed();
    void on_recordButton_released();
    void on_pushButton_clicked();
private:
    //com_serport *com1;
public:
/************voiceIntercom**********/
private:
    Ui::qvoiceintercom *ui;
};


//read and write thread====record
class rw_thread : public QThread
{
    Q_OBJECT
public:
    void run();
    explicit rw_thread(int stat);
signals:
    void readend();
private:
    int th_stat;
};

//send and get data from tuling....
class vd_thread : public QThread
{
    Q_OBJECT
public:
    void run();
    explicit vd_thread();
    int vd_send(const char * recog_param);
    int vd_end();
signals:
    void getresult();

public slots:
    void vd_stop();
public:
        char * results;
private:
    int upload_user_vocabulary();
    FILE* result_txt;

    int audio_status;
    const char *recog_params;
    const char *gramer;
};

#endif // QVOICEINTERCOM_H
qvoiceintercom.cpp文件
#include "qvoiceintercom.h"
#include "ui_qvoiceintercom.h"
#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static bool vdFlag;

QSemaphore freeSpace(5);
QSemaphore usedSpace(0);

static long int count = 0;          /*触摸屏按钮筱抖,防止短时间内多次抖动误触摸*/

rw_thread *th_read;
rw_thread *th_write;
vd_thread *th_vd;


static void dspInit(int fd,int rate,int sample_size , int sample_channel)
{
    /* 设置采样时的量化位数 */
    int arg;                        /* 用于ioctl调用的参数 */
    int status;                     /* 系统调用的返回值 */

    arg = sample_size;
    status = ioctl(fd, SOUND_PCM_WRITE_BITS, &arg);
    if (status == -1)
        perror("SOUND_PCM_WRITE_BITS ioctl failed");
    if (arg != sample_size)
        perror("unable to set sample size");

    /* 设置采样时的声道数目 */
    arg = sample_channel;
    status = ioctl(fd, SOUND_PCM_WRITE_CHANNELS, &arg);
    if (status == -1)
        perror("SOUND_PCM_WRITE_CHANNELS ioctl failed");
    if (arg != sample_channel)
        perror("unable to set number of channels");
    /* 设置采样时的采样频率 */
    arg = rate;
    status = ioctl(fd, SOUND_PCM_WRITE_RATE, &arg);
    if (status == -1)
        perror("SOUND_PCM_WRITE_WRITE ioctl failed");
    status = ioctl(fd, SOUND_PCM_SYNC, 0);
    if (status == -1)
        perror("SOUND_PCM_SYNC ioctl failed");
}

static int rdOpen()
{
    int fd;
    fd = open("/dev/dsp", O_RDONLY);
    if (fd < 0)
    {
      perror("open of /dev/dsp failed");
      return -1;
    }
    printf("[rdOpen]fd_dsprd = %d\n",fd);
    dspInit(fd ,RATE, (SIZE*2) , CHANNELS);
    return fd;
}

static int wrOpen()
{
    int fd;
    fd = open("/dev/dsp", O_WRONLY);
    if (fd < 0)
    {
        perror("[write]open of /dev/dsp failed");
        return -1;
    }
    printf("[wrOpen]fd_dspwr = %d\n",fd);
    dspInit(fd ,(RATE/2), (SIZE*2) , CHANNELS);
    return fd;
}


static int pcmPlay(const char * pcmfilename)
{
    int fd_f;
    unsigned int i ;
    unsigned char buf[RSIZE];
    unsigned long filesize = -1;
    int     fDspwr;
    struct stat statbuff;

    if(NULL == pcmfilename)return -1;

    if((fDspwr = wrOpen()) < 0)return -1;

    if(stat(pcmfilename, &statbuff) < 0){
        printf("Get pcmfilesize failed,please check!\n");
        return -1;
    }else{
        filesize = statbuff.st_size;
    }
    printf("PlayPCM...filesize=%ld\n",filesize);

    if(( fd_f = open(pcmfilename, O_RDONLY,0777))==-1)
    {
        perror("cannot open the sound file");
        return -1;
    }
    lseek(fd_f,0,SEEK_SET);
    for(i=0;i<(filesize)/RSIZE;i++)
    {
     if (read(fd_f, buf, sizeof(buf)) != sizeof(buf))
        perror("pcmfile read wrong number of bytes");
     if (write(fDspwr, buf, sizeof(buf)) != sizeof(buf))
        perror("pcm write wrong number of bytes");
    }
    ::close(fd_f);
    ioctl(fDspwr, SOUND_PCM_SYNC, 0);

    ::close(fDspwr);
    return 0;
}

qvoiceintercom::qvoiceintercom(QWidget *parent) :
    QWidget(parent),
    ui(new Ui::qvoiceintercom)
{
    ui->setupUi(this);
    this->setWindowFlags(Qt::FramelessWindowHint);
    ui->textEdit->setFontPointSize(20);
    ui->recordButton->setFlat(true);
    ui->recordButton->setFocusPolicy(Qt::NoFocus);
    ui->textEdit->setAttribute(Qt::WA_InputMethodEnabled, false);
    ui->recordButton->setAttribute(Qt::WA_InputMethodEnabled, false);


    audioHttpInit();
    audio2strInit();
    str2audioInit();
    //com1 = new com_serport(&comfd,DEV_NAME,8,1,'N');
}

qvoiceintercom::~qvoiceintercom()
{
    delete ui;
    reply->deleteLater();
}

/********************************************str2audio-start*******************************/
void qvoiceintercom::str2audioInit()
{
    setWavfilename(PCM_TEXT_TO_SPEECH);
    setParam("xiaoyan",7,3);
}
int qvoiceintercom::setWavfilename(const char * filename)
{
    strncpy(wavfilename,filename,sizeof(wavfilename));
    return 0;
}

void qvoiceintercom::setParam(const char * vcn , int spd, int vol)
{
    char tempParam[256] = "";
    int num = 0;
    num = sprintf(tempParam,"vcn = %s,aue = speex-wb,auf = audio/L16;rate = 16000,spd = %d,vol = %d,tte = utf8",vcn,spd,vol);
    qDebug()<<"setParam=" << tempParam << endl;
    memset(param,0,sizeof(param));
    strncpy(param,tempParam,sizeof(param));
}

int qvoiceintercom::textToSpeech(const char *src_text, const char *des_path, const char *params)
{
    const char* sess_id = NULL;
    int ret = 0;
    unsigned int text_len = 0;
    unsigned int audio_len = 0;
    int synth_status = 1;
    FILE* fp = NULL;

    printf("\nStart TTS!\n");
    qDebug()<<"tts param ="  << params << endl;
    if (NULL == src_text || NULL == des_path || NULL == params)
    {
        printf("params is null!\n");
        return -1;
    }
    text_len = (unsigned int)strlen(src_text);
    fp = fopen(des_path,"wb");
    if (NULL == fp)
    {
        printf("open file %s error\n",des_path);
        return -1;
    }
    sess_id = QTTSSessionBegin(params, &ret);
    if ( ret != MSP_SUCCESS )
    {
        printf("QTTSSessionBegin: qtts begin session failed Error code %d.\n",ret);
        return ret;
    }

    ret = QTTSTextPut(sess_id, src_text, text_len, NULL );
    if ( ret != MSP_SUCCESS )
    {
        printf("QTTSTextPut: qtts put text failed Error code %d.\n",ret);
        QTTSSessionEnd(sess_id, "TextPutError");
        return ret;
    }
    while (1)
    {
        const void *data = QTTSAudioGet(sess_id, &audio_len, &synth_status, &ret);
        if (NULL != data)
        {
           fwrite(data, audio_len, 1, fp);
        }
        printf("\nget audio...\n");
        usleep(150000);                             //建议可以sleep下,因为只有云端有音频合成数据,audioget都能获取到音频。
        if (synth_status == 2 || ret != 0)
        break;
    }

    fclose(fp);
    ret = QTTSSessionEnd(sess_id, NULL);
    if ( ret != MSP_SUCCESS )
    {
        printf("QTTSSessionEnd: qtts end failed Error code %d.\n",ret);
    }
    printf("\nEnd TTS!\n");
    return ret;
}
/********************************************audio_http-start*******************************/
bool qvoiceintercom::audioHttpInit()
{
    base_url = new QString("http://www.tuling123.com/openapi/api?key=39a5a489acd12eeafaf8a0d7cc1b139a&info=");
    manager = new QNetworkAccessManager(this);
    timeOut = new QTimer(this);

    connect(manager,SIGNAL(finished(QNetworkReply*)),this,SLOT(replyFinished(QNetworkReply*)));
    connect(timeOut, SIGNAL(timeout()), this, SLOT(handleTimeOut()));
    qDebug("Http Connected");
    return true;
}
void qvoiceintercom::turingInput(QString str)
{
    printf("[Enter tuiringInput]");
    QDir::setCurrent(JSON_DIR);
    fjson.setFileName("json.txt");
    if(!fjson.open(QIODevice::WriteOnly)){
        qDebug() << "Error:Cannot open Json file.";
        return ;
    }
    turing_url = *base_url + str;
    qDebug()<< turing_url << endl <<"Waiting ...." << endl;
    timeOut->start(10000);
    manager->get(QNetworkRequest(QUrl(turing_url)));
}

void qvoiceintercom::replyFinished(QNetworkReply *reply)
{
    qDebug("[Enter replyFinished]");
    json_buf.clear();
    json_buf = reply->readAll();
    qDebug() << tr(json_buf.data());

    rst_analyse(json_buf,1);
    ui->textEdit->append(tr("
[小灵]") + QString(tr(vh_code.data())) + tr("
"
)); if(strcmp("关闭",th_vd->results) == 0){ printf("enter this->close\n"); count = 0; printf("Dsp device was closed!\n"); this->close(); } textToSpeech(vh_code.data(),wavfilename,param); dspWriteEntrance(); free(th_vd->results); fjson.write(json_buf); fjson.close(); if (timeOut->isActive()) { timeOut->stop(); } } void qvoiceintercom::handleTimeOut() { qDebug()<<"[Enter handleTimeOut function]\n"; timeOut->stop(); if (fjson.isOpen()) { fjson.close(); ui->textEdit->append(tr("
[提示]#获取云端对话已经超时!#
"
)); qDebug()<< tr("[提示]#获取云端对话已经超时!#"); } else qDebug()<<"readAll : within 6 seconds! Next,do something...\n"; } void qvoiceintercom::rst_analyse(QByteArray qba,bool flag) { vh_code.clear(); if(0 == flag ){ if(turingRead(QByteArray("code"),&vh_code,qba,0) == -1) { qDebug("read error"); } } else if(1 == flag){ if(turingRead(QByteArray("text"),&vh_code,qba,0) == -1) { qDebug("read error"); } } printf("vh_code.data:%s\n",vh_code.data()); } int qvoiceintercom::turingRead(QByteArray j_name,QByteArray *j_data,QByteArray rst_json,int index) { int i = index; char *rst = rst_json.data(); bool flag = 0; QByteArray temp; if(i == 0) i++; while(1) { //printf("i:%d\n",i); while((rst[i] != ':') || (flag == true)) { if(rst[i] == '"') { printf("rst[%d]:%c flag:%d\n",i,rst[i],flag); i++; flag = !flag; continue; } temp += rst[i++]; } //printf("temp:%s\n",temp.data()); //printf("name:%s\n",j_name.data()); if(strstr(temp,j_name)) break; while((rst[i] != ',') && (rst[i] != '}')) { i++; } if(rst[i] == '}') { qDebug("can not find data!"); return -1; } i++; } //printf("nameend%c\n",rst_json.at(i)); i++; while(!((rst[i] == ',') || (rst[i] == '}')) || (flag == true)) { if(rst[i] == '"') { i++; flag ^= flag; } if(rst[i] == '}')break; *j_data += rst[i++]; } //printf("%c\n",rst_json.at(i)); i++; return i; } /********************************************audio2str-start*******************************/ void qvoiceintercom::audio2strInit() { th_vd = new vd_thread; th_read = new rw_thread(0); th_write = new rw_thread(1); connect(th_read,SIGNAL(readend()),th_vd,SLOT(vd_stop())); connect(th_vd,SIGNAL(getresult()),this,SLOT(rst_manage())); } void qvoiceintercom::dspReadEntrance() { qDebug()<<"[Enter dspReadEntrance()]\n"; th_read->start(); } void qvoiceintercom::dspWriteEntrance() { th_write->start(); count = 0; } void qvoiceintercom::rst_manage() { qDebug()<<"[Enter rst_manage()]\n"; printf("\nresults =%s=\n",th_vd->results); if( strcmp("",th_vd->results) == 0){ printf("手抖了,或者您的声音太小了!"); ui->textEdit->append(tr("
[提示]#手抖了,或者您的声音太小了!#
"
)); pcmPlay(PCM_SPEAKLOUNDLY); count = 0; } else { ui->textEdit->append(tr("
[主人]")+ QObject::tr(th_vd->results) + tr("
"
)); turingInput(QString(tr(th_vd->results))); } //com1->msg_send(th_vd->results); } /********************************************rw_thread start*******************************/ rw_thread::rw_thread(int stat) { th_stat = stat; } void rw_thread::run() { int fDsprd = -1; int status =0 ; if(th_stat == 1) { qDebug()<<"write rw_thread is runing"; pcmPlay(PCM_TEXT_TO_SPEECH); } if(th_stat == 0) { if((fDsprd = rdOpen()) < 0){ return ; } qDebug()<<"read rw_thread is runing.." << fDsprd ; freeSpace.acquire(); printf("[rw_thread] freeSpace= %d\n",freeSpace.available()); th_vd->start(); FILE * fp; fp = fopen(PCM_SOUND,"w"); if (NULL == fp) { printf("open file error\n"); exit(1); } unsigned char dspBuf[LENGTH *RATE * SIZE * CHANNELS / 8] = ""; while(vdFlag) { status = read(fDsprd, dspBuf, sizeof(dspBuf)); if (status != sizeof(dspBuf)) printf("read wrong number of bytes"); fwrite(dspBuf, sizeof(dspBuf), 1, fp); printf("finished read\n"); memset(dspBuf,0,sizeof(dspBuf)); } fclose(fp); ::close(fDsprd); emit readend(); usleep(200000); usedSpace.release(); printf("[rw_thread] userdSpace= %d\n",usedSpace.available()); } } /********************************************vd_thread start*******************************/ int vd_thread::vd_end() { int ret; ret = MSPLogout(); if( MSP_SUCCESS != ret ) { printf( "MSPLogout failed, error code is: %d", ret ); return -1; } return 0; } int vd_thread::vd_send(const char * recog_param) { qDebug()<<"[Enter vd_send]\n"; int vd_ret; int ep_status; int rslt_status; const char *session_id; audio_status = 0x02; /* 开始一路会话 */ session_id = QISRSessionBegin( NULL, recog_param, &vd_ret ); if( 0 != vd_ret ) { printf( "QISRSessionBegin failed, error code is %d\n", vd_ret ); return -1; } FILE *f_pcm = NULL; long pcmSize = 0; int lastAudio = 0 ; int audStat = 2 ; long pcmCount = 0; char rec_result[1024*4] = {0}; char * wrBuf = NULL; usedSpace.acquire(); printf("[vd_thread] usedSpace= %d\n",usedSpace.available()); f_pcm = fopen(PCM_SOUND, "rb"); if (NULL != f_pcm) { fseek(f_pcm, 0, SEEK_END); pcmSize = ftell(f_pcm); printf("pcmSize = %ld\n",pcmSize); fseek(f_pcm, 0, SEEK_SET); wrBuf = (char *)malloc(pcmSize); fread((void *)wrBuf, pcmSize, 1, f_pcm); fclose(f_pcm); f_pcm = NULL; } else return -1; /* 发送音频数据,获取语音听写结果*/ while( 1) { unsigned int len = 6400; unsigned int audio_len = 6400; if (pcmSize < 12800) { len = pcmSize; lastAudio = 1; } audStat = 2; if (pcmCount == 0) audStat = 1; if (len<=0) { printf("len <= 0 : break\n"); break; } printf("\ncsid=%s,count=%ld,aus=%d,",session_id,pcmCount/audio_len,audStat); vd_ret = QISRAudioWrite(session_id, (const void *)&wrBuf[pcmCount], len, audStat, &ep_status, &rslt_status); printf("eps=%d,rss=%d,ret=%d",ep_status,rslt_status,vd_ret); if (vd_ret != 0){ printf("vd_ret is not 0 : break\n"); break; } pcmCount += (long)len; pcmSize -= (long)len; if (rslt_status == 0) { const char *rslt = QISRGetResult(session_id, &rslt_status, 0, &vd_ret); if (vd_ret !=0) { printf("QISRGetResult Failed,vd_ret=%d\n",vd_ret); break; } if (NULL != rslt) strcat(rec_result,rslt); } if (ep_status == MSP_EP_AFTER_SPEECH) break; usleep(150000); } /* 获取余下的识别结果*/ printf("start get the rest of results\n"); vd_ret=QISRAudioWrite(session_id, (const void *)NULL, 0, 4, &ep_status, &rslt_status); if (vd_ret !=0) { printf("QISRAudioWrite Failed,vd_ret=%d\n",vd_ret); } free(wrBuf); wrBuf = NULL; while (rslt_status != 5 && vd_ret == 0) { printf("get the rest of results\n"); const char *rslt = QISRGetResult(session_id, &rslt_status, 0, &vd_ret); if (NULL != rslt) { strcat(rec_result,rslt); } usleep(150000); } vd_ret=QISRSessionEnd(session_id, NULL); if(vd_ret !=MSP_SUCCESS) { printf("QISRSessionEnd Failed, vd_ret=%d\n",vd_ret); } freeSpace.release(); qDebug()<< "[vd_thread] freeSpace:" << freeSpace.available() << endl; results = (char *)malloc(sizeof(rec_result)); strncpy(results,rec_result,sizeof(rec_result)); /* sleep 一下很有必要,防止MSC 端无缓存的识别结果时浪费CPU 资源*/ usleep(100000); if( NULL != rec_result ) { emit getresult(); } /* 结束会话,释放资源*/ session_id = NULL; return 0; } vd_thread::vd_thread() { int ret = 0; /* 用户登录 */ ret = MSPLogin("Birdman", "6726389!","appid = 54601962"); if( 0 != ret ) { printf( "MSPLogin failed, error code is %d\n", ret ); return ; } printf("\nMSPLogin successful :%d\n",ret); recog_params = "sub=iat, ptt=0, aue=speex-wb;7, auf=audio/L16;rate=16000, ent=sms16k, rst=plain, rse=utf8, vad_speech_tail=1500"; } void vd_thread::run() { qDebug()<<"vd_thread is runing...\n"; vd_send(recog_params); } void vd_thread::vd_stop() { qDebug("[Enter vd_stop]"); audio_status = MSP_AUDIO_SAMPLE_LAST ; } /***************************************************************************************/ void qvoiceintercom::on_recordButton_pressed() { qDebug()<<"[Enter on_recordButton_pressed]"; vdFlag = true; count++; if(count ==1) dspReadEntrance(); } void qvoiceintercom::on_recordButton_released() { qDebug()<<"[Enter on_recordButton_released]"; vdFlag = false; } void qvoiceintercom::on_pushButton_clicked() { printf("[Enter on_pushButton_clicked]"); count = 0; printf("Dsp device was closed!\n"); this->close(); }

5. 界面设计

语音对讲---基于图灵机器人+科大讯飞_第1张图片
语音对讲---基于图灵机器人+科大讯飞_第2张图片

你可能感兴趣的:(ARM-Linux)