利用Waveform函数播放PCM数据流

前言

由于近期需要做一些语音合成的工作,因此,需要进行对语音合成的数据进行实时播放,到网上找了一下资料,参考MSDN的相关说明,写下了如下一个PCM播放数据流的类,多说无益,直接上码:

头文件 pcmspeaker.h

#pragma once

#include  
#include "mmsystem.h" 

#pragma comment(lib, "winmm.lib") 

#define DEF_MAX_BUFFER_SIZE (1024 * 16)
#define DEF_MAX_BUFFER_COUNT 16

class CPcmSpeaker
{
public:
    CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT);
    ~CPcmSpeaker();

    int init(int channels, int samplePerSec, int bitsPerSample);

    //添加PCM音频数据,等待播放
    int toSpeaker(const void *data, int len, int timeout = INFINITE);
    int clearPcmData();

private:

    typedef struct
    {
        WAVEHDR header;
        char *data;
    }WaveHeadandData;

    int m_maxBufferSize;
    int m_maxBufferCnt;
    
    WaveHeadandData *m_headAndDatas;
    static void CALLBACK waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2);
    int writeToWave(const void *data, int len);
    int pcmtoWave(const void *data, int len, int timeout = INFINITE);

    // 公共信息
    WAVEFORMATEX m_waveFormat;
    HWAVEOUT m_hWaveOut; // WAVEOUT句柄
    HANDLE m_hBufferEvent;
    CRITICAL_SECTION m_BufferOpCriticalSection;
};

实现文件 pcmspeaker.cpp

#include "PcmSpeaker.h"

CPcmSpeaker::CPcmSpeaker(int bufferSize, int bufferCnt)
{
    m_hWaveOut = NULL;
    m_hBufferEvent = CreateEvent(NULL, FALSE, FALSE, NULL);

    InitializeCriticalSection(&m_BufferOpCriticalSection);

    //申请内存
    m_headAndDatas = new WaveHeadandData[bufferCnt];
    for (int i = 0; i < bufferCnt; i++)
    {
        memset(&m_headAndDatas[i].header, 0, sizeof(WAVEHDR));
        m_headAndDatas[i].header.dwFlags = WHDR_DONE;
        m_headAndDatas[i].data = new char[bufferSize];
    }

    m_maxBufferSize = bufferSize;
    m_maxBufferCnt = bufferCnt;
}


CPcmSpeaker::~CPcmSpeaker()
{
    //关闭Wave
    if (m_hWaveOut != NULL)
    {
        clearPcmData();
        waveOutClose(m_hWaveOut);
        m_hWaveOut = NULL;
    }

    //关闭一些句柄
    CloseHandle(m_hBufferEvent);

    //删除临界区
    DeleteCriticalSection(&m_BufferOpCriticalSection);

    //释放内存
    for (int i = 0; i < m_maxBufferCnt; i++)
        delete[] m_headAndDatas[i].data;

    delete[] m_headAndDatas;
}


int CPcmSpeaker::init(int channels, int samplePerSec, int bitsPerSample)
{
    if (m_hWaveOut != NULL) {
        return 0;// 已经进行了初始化
    }

    // 第一步: 获取waveformat信息
    m_waveFormat.wFormatTag = WAVE_FORMAT_PCM;
    m_waveFormat.nChannels = channels;
    m_waveFormat.wBitsPerSample = bitsPerSample;
    m_waveFormat.nSamplesPerSec = samplePerSec;
    m_waveFormat.nBlockAlign =
        m_waveFormat.nChannels * m_waveFormat.wBitsPerSample / 8;
    m_waveFormat.nAvgBytesPerSec =
        m_waveFormat.nSamplesPerSec * m_waveFormat.nBlockAlign;
    m_waveFormat.cbSize = sizeof(m_waveFormat);

    MMRESULT ret = waveOutOpen(NULL, WAVE_MAPPER, &m_waveFormat,
        NULL, NULL, WAVE_FORMAT_QUERY);
    if (MMSYSERR_NOERROR != ret) {
        return -1;
    }

    // 第二步: 获取WAVEOUT句柄
    ret = waveOutOpen(&m_hWaveOut, WAVE_MAPPER, &m_waveFormat,
        (DWORD_PTR)waveOutProc, (DWORD_PTR)this, CALLBACK_FUNCTION);

    if (MMSYSERR_NOERROR != ret) {
        return -1;
    }

    return 0;
}

void CALLBACK CPcmSpeaker::waveOutProc(HWAVEOUT hwo, UINT uMsg, DWORD dwInstance, DWORD dwParam1, DWORD dwParam2)
{
    CPcmSpeaker *render = (CPcmSpeaker *)dwInstance;
    //WAVEHDR *header = (WAVEHDR *)dwParam1;
    int i = 0;
    switch (uMsg)
    {
    case WOM_DONE:
        EnterCriticalSection(&render->m_BufferOpCriticalSection);
        SetEvent(render->m_hBufferEvent);
        LeaveCriticalSection(&render->m_BufferOpCriticalSection);
        break;
    case WOM_CLOSE:
        i = 1;
        break;
    case WOM_OPEN:
        i = 2;
        break;
    }
}

int CPcmSpeaker::clearPcmData()
{
    if (m_hWaveOut != NULL)
    {
        EnterCriticalSection(&m_BufferOpCriticalSection);
        for (int i = 0; i < m_maxBufferCnt; i++)
        {
            if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered
                waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
        }

        waveOutReset(m_hWaveOut);
        LeaveCriticalSection(&m_BufferOpCriticalSection);
    }
    return 0;
}

int CPcmSpeaker::writeToWave(const void *data, int len)
{
    MMRESULT mmres;
    int i;
    EnterCriticalSection(&m_BufferOpCriticalSection);
    for (i = 0; i < m_maxBufferCnt; i++)
        if (m_headAndDatas[i].header.dwFlags & WHDR_DONE)
        {
            //查看是否需要释放之前已经Prepared资源
            if (m_headAndDatas[i].header.dwFlags & WHDR_PREPARED) //有数据被Prepered
                waveOutUnprepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
            
            //写入新的数据到音频缓冲区      
            memcpy(m_headAndDatas[i].data, data, len);
            m_headAndDatas[i].header.lpData = m_headAndDatas[i].data;
            m_headAndDatas[i].header.dwBufferLength = len;
            m_headAndDatas[i].header.dwFlags = 0;

            mmres = waveOutPrepareHeader(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
            if (MMSYSERR_NOERROR == mmres)
                mmres = waveOutWrite(m_hWaveOut, &m_headAndDatas[i].header, sizeof(WAVEHDR));
            

            break;
        }
    LeaveCriticalSection(&m_BufferOpCriticalSection);

    if (i == m_maxBufferCnt)
        return -2;

    return (mmres == MMSYSERR_NOERROR) ? 0 : -1;
}

//添加PCM音频数据,等待播放
int CPcmSpeaker::pcmtoWave(const void *data, int len, int timeout)
{
    int res;

    if (len > m_maxBufferSize)
        return -1;

    res = writeToWave(data, len);

    //缓冲区已满,需要等待
    if (res == -2)
    {       
        if (WAIT_OBJECT_0 == WaitForSingleObject(m_hBufferEvent, timeout))
            res = writeToWave(data, len);
    }
    
    return res;
}

int CPcmSpeaker::toSpeaker(const void *data, int len, int timeout)
{
    int res;
    int n, l, ptr;

    //对大数据做分段处理
    n = len / m_maxBufferSize;
    l = len % m_maxBufferSize;
    ptr = 0;

    for (int i = 0; i < n; i++)
    {
        res = pcmtoWave(((char *)data) + ptr, m_maxBufferSize, timeout);
        ptr += m_maxBufferSize;
        if (res != 0)
            return -1;
    }

    return pcmtoWave(((char *)data) + ptr, l, timeout);
}

用法

用法非常简单,如下:

  1. 定义实例:CPcmSpeaker ps;
  2. 初始化参数:ps.init(1, 16000, 16);三个参数分别为:通道数,采样速率,单次采样数据位
  3. 填PCM数据到喇叭:ps.toSpeaker(data, data_len);两个参数分别为PCM数据指针和数据长度。

而外说明

微软的这个Waveform相关的函数,感觉比较原始,用的时候,需要如下注意事项:

  • 如MSDN所言,waveOutProc中不能调用任何Waveform相关函数,原文如下:

Applications should not call any system-defined functions from inside a callback function, except for EnterCriticalSection, LeaveCriticalSection, midiOutLongMsg, midiOutShortMsg, OutputDebugString, PostMessage, PostThreadMessage, SetEvent, timeGetSystemTime, timeGetTime, timeKillEvent, and timeSetEvent. Calling other wave functions will cause deadlock.

  • CPcmSpeaker的构造函数定义为:CPcmSpeaker(int bufferSize = DEF_MAX_BUFFER_SIZE, int bufferCnt = DEF_MAX_BUFFER_COUNT),其有两个有默认值的参数,分别为每次写入系统音频缓冲区的数据的最大大小,以及CPcmSpeaker自己的缓冲区个数,bufferCnt不要太小(最好大于2,根据具体的情况设置大小,建议值为32),否则会出现卡顿现象。

  • toSpeaker函数,带有第三个参数(默认为INFINITEtimeout,表示函数调用超时时间。换句话说,该函数在某种程度上是阻塞式的,即,如果写入的太快,使得系统来不及播放数据,导致CPcmSpeaker类内部的缓冲区已经满了,那么toSpeaker函数将会等待有新的缓冲区数据被播放后,腾出空间后,才返回,当然如果你不想死等,可以设置一个超时值,超时后,也会返回。

你可能感兴趣的:(利用Waveform函数播放PCM数据流)