视频特效滤镜 via Media Foundation Transform (MFT)

视频特效滤镜 via Media Foundation Transform

  • 视频特效定义
  • Media Foundation Transform
    • IMFTransform::GetInputStreamInfo 函数
    • IMFTransform::GetOutputStreamInfo 函数
    • IMFTransform::GetInputAvailableType 函数
      • CGrayscale::OnGetPartialType 函数
    • IMFTransform::SetInputType 函数
      • CGrayscale::OnCheckInputType 函数
        • CGrayscale::OnCheckMediaType 函数
      • CGrayscale::OnSetInputType 函数
    • IMFTransform::SetOutputType 函数
    • IMFTransform::ProcessMessage 函数
    • IMFTransform::ProcessInput 函数
    • IMFTransform::ProcessOutput 函数
      • CGrayscale::OnProcessOutput 函数
  • 其他框架的滤镜

视频特效定义

视频特效(Video effects 或 Visual effects)是对每帧图像进行各种数字化处理达到的效果。如对画面的尺寸、位置、亮度及色度等参数进行处理,就可获得缩放、旋转、黑白、油画等各种效果。

常见的特效技术有:缩放、旋转、裁剪、叠加、老电影、黑白、淡入淡出、水印、去噪、慢动作、2D 转 3D 等等。

Media Foundation Transform

MF 中插件是以 MFT 的形式创建的,需要继承 IMFTransform 接口,接口函数如下:

// Methods That Handle Format Negotiation
STDMETHODIMP GetStreamLimits(DWORD *pInputMinimum, DWORD *pInputMaximum, DWORD *pOutputMinimum, DWORD *pOutputMaximum);
STDMETHODIMP GetStreamCount(DWORD *pcInputStreams, DWORD *pcOutputStreams);
STDMETHODIMP GetStreamIDs(DWORD dwInputIDArraySize, DWORD *pdwInputIDs, DWORD dwOutputIDArraySize, DWORD *pdwOutputIDs);
STDMETHODIMP GetInputStreamInfo(DWORD dwInputStreamID, MFT_INPUT_STREAM_INFO *pStreamInfo);
STDMETHODIMP GetOutputStreamInfo(DWORD dwOutputStreamID, MFT_OUTPUT_STREAM_INFO *pStreamInfo);
STDMETHODIMP GetAttributes(IMFAttributes **ppAttributes);
STDMETHODIMP GetInputStreamAttributes(DWORD dwInputStreamID, IMFAttributes **ppAttributes);
STDMETHODIMP GetOutputStreamAttributes(DWORD dwOutputStreamID, IMFAttributes **ppAttributes);
STDMETHODIMP GetInputAvailableType(DWORD dwInputStreamID, DWORD dwTypeIndex, IMFMediaType **ppType);
STDMETHODIMP GetOutputAvailableType(DWORD dwOutputStreamID, DWORD dwTypeIndex, IMFMediaType **ppType);
STDMETHODIMP SetInputType(DWORD dwInputStreamID, IMFMediaType *pType, DWORD dwFlags);
STDMETHODIMP SetOutputType(DWORD dwOutputStreamID, IMFMediaType *pType, DWORD dwFlags);

// Methods That Specify or Retrieve State Information
STDMETHODIMP GetInputCurrentType(DWORD dwInputStreamID, IMFMediaType **ppType);
STDMETHODIMP GetOutputCurrentType(DWORD dwOutputStreamID, IMFMediaType **ppType);
STDMETHODIMP DeleteInputStream(DWORD dwStreamID);
STDMETHODIMP AddInputStreams(DWORD cStreams, DWORD *adwStreamIDs);
STDMETHODIMP GetInputStatus(DWORD dwInputStreamID, DWORD *pdwFlags);
STDMETHODIMP GetOutputStatus(DWORD *pdwFlags);
STDMETHODIMP SetOutputBounds(LONGLONG hnsLowerBound, LONGLONG hnsUpperBound);

// Methods That Handle Buffering and Processing Data
STDMETHODIMP ProcessEvent(DWORD dwInputStreamID, IMFMediaEvent *pEvent);
STDMETHODIMP ProcessMessage(MFT_MESSAGE_TYPE eMessage, ULONG_PTR ulParam);
STDMETHODIMP ProcessInput(DWORD dwInputStreamID, IMFSample *pSample, DWORD dwFlags);
STDMETHODIMP ProcessOutput(DWORD dwFlags, DWORD cOutputBufferCount, MFT_OUTPUT_DATA_BUFFER *pOutputSamples, DWORD *pStatus);

如果要实现一个 异步的 MFT,则还需要继承 IMFMediaEventGenerator 和 IMFShutdown 接口。

// IMFMediaEventGenerator
HRESULT BeginGetEvent(IMFAsyncCallback *pCallback, IUnknown *punkState);
HRESULT EndGetEvent(IMFAsyncResult *pResult, IMFMediaEvent **ppEvent);
HRESULT GetEvent(DWORD dwFlags, IMFMediaEvent **ppEvent);
HRESULT QueueEvent(MediaEventType met, REFGUID guidExtendedType, HRESULT hrStatus, const PROPVARIANT *pvValue);

// IMFShutdown
HRESULT GetShutdownStatus(MFSHUTDOWN_STATUS *pStatus);
HRESULT Shutdown();

下面介绍一下 IMFTransform 的几个比较重要的接口,以一个灰度化视频的插件为例。
代码包含在 Windows SDK 7.x 的 samples\multimedia\mediafoundation\mft_grayscale\ 目录下。

IMFTransform::GetInputStreamInfo 函数

对输入流的要求,比如视频 sample 必须完整且只有一个 buffer,流的大小等等。

HRESULT CGrayscale::GetInputStreamInfo(DWORD dwInputStreamID, MFT_INPUT_STREAM_INFO *pStreamInfo)
{
    AutoLock lock(m_critSec);
    if (pStreamInfo == NULL)
        return E_POINTER;
    
    if (!IsValidInputStream(dwInputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;

    // NOTE: This method should succeed even when there is no media type on the stream. 
    //       If there is no media type, we only need to fill in the dwFlags member of 
    //       MFT_INPUT_STREAM_INFO. The other members depend on having a valid media type.
    pStreamInfo->hnsMaxLatency = 0;
    pStreamInfo->dwFlags = MFT_INPUT_STREAM_WHOLE_SAMPLES | MFT_INPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER ;

    if (m_pInputType == NULL)
        pStreamInfo->cbSize = 0;
    else
        pStreamInfo->cbSize = m_cbImageSize;

    pStreamInfo->cbMaxLookahead = 0;
    pStreamInfo->cbAlignment = 0;
    return S_OK;
}

IMFTransform::GetOutputStreamInfo 函数

输出流的信息,比如视频 sample 是完整且固定大小的,流的大小等等。

HRESULT CGrayscale::GetOutputStreamInfo(DWORD dwOutputStreamID, MFT_OUTPUT_STREAM_INFO *pStreamInfo)
{
    AutoLock lock(m_critSec);
    if (pStreamInfo == NULL)
        return E_POINTER;
        
    if (!IsValidOutputStream(dwOutputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;

    // NOTE: This method should succeed even when there is no media type on the stream. 
    //       If there is no media type, we only need to fill in the dwFlags member of 
    //       MFT_OUTPUT_STREAM_INFO. The other members depend on having a valid media type.
    pStreamInfo->dwFlags = 
        MFT_OUTPUT_STREAM_WHOLE_SAMPLES | 
        MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER |
        MFT_OUTPUT_STREAM_FIXED_SAMPLE_SIZE ;

    if (m_pOutputType == NULL)
        pStreamInfo->cbSize = 0;
    else
        pStreamInfo->cbSize = m_cbImageSize;
    pStreamInfo->cbAlignment = 0;
    return S_OK;
}

IMFTransform::GetInputAvailableType 函数

获取支持的输入媒体类型,如果输出类型已设置,则要求输入和输出类型一致,否则提供一组可接受的类型。

HRESULT CGrayscale::GetInputAvailableType(
    DWORD           dwInputStreamID,    // Input stream ID.
    DWORD           dwTypeIndex,        // 0-based index into the list of preferred types.
    IMFMediaType    **ppType            // Receives a pointer to the media type.
    )
{
    AutoLock lock(m_critSec);
    if (ppType == NULL)
        return E_INVALIDARG;
        
    if (!IsValidInputStream(dwInputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;
        
    HRESULT hr = S_OK;
    // If the output type is set, return that type as our preferred input type.
    if (this->m_pOutputType) {
        if (dwTypeIndex > 0)
            return MF_E_NO_MORE_TYPES;
            
        *ppType = m_pOutputType;
        (*ppType)->AddRef();
    }
    else // The output type is not set. Create a partial media type.
        hr = OnGetPartialType(dwTypeIndex, ppType);
        
    return hr;
}

CGrayscale::OnGetPartialType 函数

可接受的媒体类型,此处只提供了三种(理论上没有限制):

  1. NV12
  2. YUY2
  3. UYVY
const GUID* g_MediaSubtypes[] = 
{
    & MEDIASUBTYPE_NV12,
    & MEDIASUBTYPE_YUY2,
    & MEDIASUBTYPE_UYVY,
};

HRESULT CGrayscale::OnGetPartialType(DWORD dwTypeIndex, IMFMediaType **ppmt)
{
    HRESULT hr = S_OK;
    if (dwTypeIndex >= g_cNumSubtypes)
        return MF_E_NO_MORE_TYPES;

    IMFMediaType *pmt = NULL;
    CHECK_HR(hr = MFCreateMediaType(&pmt));
    CHECK_HR(hr = pmt->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
    CHECK_HR(hr = pmt->SetGUID(MF_MT_SUBTYPE, *g_MediaSubtypes[dwTypeIndex]));

    *ppmt = pmt;
    (*ppmt)->AddRef();
done:
    SAFE_RELEASE(pmt);
    return hr;
}

IMFTransform::SetInputType 函数

MF session 尝试设置某个输入媒体类型到该 MFT,可以是 Test Only 的,如果接受该类型,则返回成功,否则返回失败。

HRESULT CGrayscale::SetInputType( DWORD dwInputStreamID,
      IMFMediaType    *pType, // Can be NULL to clear the input type.
      DWORD           dwFlags )
{
    AutoLock lock(m_critSec);
    if (!IsValidInputStream(dwInputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;
        
    if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
        return E_INVALIDARG;
        
    HRESULT hr = S_OK;
    // Does the caller want us to set the type, or just test it?
    BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
    
    // If we have an input sample, the client cannot change the type now.
    if (HasPendingOutput())
        CHECK_HR(hr = MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING);
        
    if (pType)
        CHECK_HR(hr = OnCheckInputType(pType));
        
    // The type is OK. Set the type, unless the caller was just testing.
    if (bReallySet)
        CHECK_HR(hr = OnSetInputType(pType));
done:
    return hr;
}

CGrayscale::OnCheckInputType 函数

检查输入类型,如果已设置输出类型,则要求一致,否则做视频类型检查(OnCheckMediaType)。

HRESULT CGrayscale::OnCheckInputType(IMFMediaType *pmt)
{
    assert(pmt != NULL);
    HRESULT hr = S_OK;

    // If the output type is set, see if they match.
    if (m_pOutputType != NULL) {
        DWORD flags = 0;
        hr = pmt->IsEqual(m_pOutputType, &flags);

        // IsEqual can return S_FALSE. Treat this as failure.
        if (hr != S_OK)
            hr = MF_E_INVALIDMEDIATYPE;
    }
    else
        // Output type is not set. Just check this type.
        hr = OnCheckMediaType(pmt);

    return hr;
}

CGrayscale::OnCheckMediaType 函数

检测视频类型是否符合该 MFT 的要求。

HRESULT CGrayscale::OnCheckMediaType(IMFMediaType *pmt)
{
    GUID major_type = GUID_NULL;
    GUID subtype = GUID_NULL;
    BOOL bFoundMatchingSubtype = FALSE;
    HRESULT hr = S_OK;
    
    CHECK_HR(hr = pmt->GetGUID(MF_MT_MAJOR_TYPE, &major_type));
    RETURN_IF_FALSE_EX(major_type == MFMediaType_Video, MF_E_INVALIDMEDIATYPE);
    
    // Subtype must be one of the subtypes in our global list.
    CHECK_HR(hr = pmt->GetGUID(MF_MT_SUBTYPE, &subtype));
    
    for (DWORD i = 0; i < g_cNumSubtypes; i++) {
        if (subtype == *g_MediaSubtypes[i]) {
            bFoundMatchingSubtype = TRUE;
            break;
        }
    }
    
    RETURN_IF_FALSE_EX(bFoundMatchingSubtype, MF_E_INVALIDMEDIATYPE);
done:
    return hr;
}

CGrayscale::OnSetInputType 函数

设置或清除输入媒体类型,并获得一帧图像的宽高和大小。

HRESULT CGrayscale::OnSetInputType(IMFMediaType *pmt)
{
    SAFE_RELEASE(m_pInputType);
    m_pInputType = pmt;
    if (m_pInputType)
        m_pInputType->AddRef();
        
    m_imageWidthInPixels = 0;
    m_imageHeightInPixels = 0;
    m_videoFOURCC = 0;
    m_cbImageSize = 0;
    m_pTransformFn = NULL;
    
    if (m_pInputType != NULL) {
        GUID subtype = GUID_NULL;
        CHECK_HR(hr = m_pInputType->GetGUID(MF_MT_SUBTYPE, &subtype));
        
        m_videoFOURCC = subtype.Data1;
        switch (m_videoFOURCC) {
            case FOURCC_YUY2: 
                m_pTransformFn = TransformImage_YUY2;
             	break;
            case FOURCC_UYVY:
                m_pTransformFn = TransformImage_UYVY;
             	break;
            case FOURCC_NV12:
                m_pTransformFn = TransformImage_NV12;
             	break;
            default:
                CHECK_HR(hr = E_UNEXPECTED);
        }
        
        CHECK_HR(hr = MFGetAttributeSize( m_pInputType, MF_MT_FRAME_SIZE, &m_imageWidthInPixels, &m_imageHeightInPixels ));
        CHECK_HR(hr = GetImageSize(m_videoFOURCC, m_imageWidthInPixels, m_imageHeightInPixels, &m_cbImageSize));
    }
    
    return S_OK;
}

IMFTransform::SetOutputType 函数

MF session 尝试设置某个输出媒体类型到该 MFT,可以是 Test Only 的,如果接受该类型,则返回成功,否则返回失败。检查过程类似 SetInputType,故此处不展开。

HRESULT CGrayscale::SetOutputType( DWORD dwOutputStreamID,
    IMFMediaType    *pType, // Can be NULL to clear the output type.
    DWORD           dwFlags )
{
    AutoLock lock(m_critSec);
    if (!IsValidOutputStream(dwOutputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;
        
    if (dwFlags & ~MFT_SET_TYPE_TEST_ONLY)
        return E_INVALIDARG;
        
    HRESULT hr = S_OK;
    // Does the caller want us to set the type, or just test it?
    BOOL bReallySet = ((dwFlags & MFT_SET_TYPE_TEST_ONLY) == 0);
     
    // If we have an input sample, the client cannot change the type now.
    if (HasPendingOutput())
        CHECK_HR(hr = MF_E_TRANSFORM_CANNOT_CHANGE_MEDIATYPE_WHILE_PROCESSING);
        
    if (pType)
        CHECK_HR(hr = OnCheckOutputType(pType));
        
    if (bReallySet) // The type is OK. Set the type, unless the caller was just testing.
        CHECK_HR(hr = OnSetOutputType(pType));
done:
    return hr;
}

IMFTransform::ProcessMessage 函数

处理发送到该 MFT 的消息。

HRESULT CGrayscale::ProcessMessage(MFT_MESSAGE_TYPE eMessage, ULONG_PTR ulParam)
{
    AutoLock lock(m_critSec);
    HRESULT hr = S_OK;
    
    switch (eMessage) {
    case MFT_MESSAGE_COMMAND_FLUSH:
        hr = OnFlush();
        break;
    case MFT_MESSAGE_COMMAND_DRAIN:
        // Drain: Tells the MFT not to accept any more input until all of the pending output
        // has been processed. That is our default behavior already, so there is nothing to do.
        break;
    case MFT_MESSAGE_SET_D3D_MANAGER:
        // The pipeline should never send this message unless the MFT has the MF_SA_D3D_AWARE attribute 
        // set to TRUE. However, if we do get this message, it's invalid and we don't implement it.
        hr = E_NOTIMPL;
        break;
    // The remaining messages do not require any action from this MFT.
    case MFT_MESSAGE_NOTIFY_BEGIN_STREAMING:
    case MFT_MESSAGE_NOTIFY_END_STREAMING:
    case MFT_MESSAGE_NOTIFY_END_OF_STREAM:
    case MFT_MESSAGE_NOTIFY_START_OF_STREAM: 
        break;
    } 
    return hr;
}

IMFTransform::ProcessInput 函数

处理一个输入的 sample,这里主要是把该 sample 缓存起来。

HRESULT CGrayscale::ProcessInput( DWORD dwInputStreamID, IMFSample *pSample, DWORD dwFlags )
{
    AutoLock lock(m_critSec);
    RETURN_IF_NULL(pSample);
    RETURN_IF_FALSE_EX(dwFlags == 0, E_INVALIDARG); // dwFlags is reserved and must be zero.
    
    if (!IsValidInputStream(dwInputStreamID))
        return MF_E_INVALIDSTREAMNUMBER;
        
    if (!m_pInputType || !m_pOutputType)
        return MF_E_NOTACCEPTING;   // Client must set input and output types.
        
    if (m_pSample != NULL)
        return MF_E_NOTACCEPTING;   // We already have an input sample.
        
    HRESULT hr = S_OK;
    DWORD dwBufferCount = 0;
    // Validate the number of buffers. There should only be a single buffer to hold the video frame. 
    hr = pSample->GetBufferCount(&dwBufferCount);
    RETURN_IF_FAILED(hr);
    RETURN_IF_FALSE(dwBufferCount > 0);
    RETURN_IF_TRUE(dwBufferCount > 1, MF_E_SAMPLE_HAS_TOO_MANY_BUFFERS);
    
    // Cache the sample. We do the actual work in ProcessOutput.
    m_pSample = pSample;
    pSample->AddRef();  // Hold a reference count on the sample.
    return hr;
}

IMFTransform::ProcessOutput 函数

处理一个输出 sample 的请求,如果尚未有输入,则请求输入(返回 MF_E_TRANSFORM_NEED_MORE_INPUT),否则进行处理(特效算法),最后设置 sample 的 duration 和 timestamp。

HRESULT CGrayscale::ProcessOutput(
    DWORD dwFlags,
    DWORD cOutputBufferCount, 
    MFT_OUTPUT_DATA_BUFFER *pOutputSamples,
    DWORD *pdwStatus)
{
    AutoLock lock(m_critSec);
    if (m_pSample == NULL)
        return MF_E_TRANSFORM_NEED_MORE_INPUT;
        
    HRESULT hr = S_OK;
    
    IMFMediaBuffer *pInput = NULL;
    CHECK_HR(hr = m_pSample->ConvertToContiguousBuffer(&pInput));
    
    IMFMediaBuffer *pOutput = NULL;
    CHECK_HR(hr = pOutputSamples[0].pSample->ConvertToContiguousBuffer(&pOutput));
    
    CHECK_HR(hr = OnProcessOutput(pInput, pOutput));
    
    pOutputSamples[0].dwStatus = 0; 
    *pdwStatus = 0;
    LONGLONG hnsDuration = 0;
    
    if (SUCCEEDED(m_pSample->GetSampleDuration(&hnsDuration)))
        CHECK_HR(hr = pOutputSamples[0].pSample->SetSampleDuration(hnsDuration));
        
    LONGLONG hnsTime = 0;
    if (SUCCEEDED(m_pSample->GetSampleTime(&hnsTime)))
        CHECK_HR(hr = pOutputSamples[0].pSample->SetSampleTime(hnsTime));
done:
    SAFE_RELEASE(m_pSample);
    SAFE_RELEASE(pInput);
    SAFE_RELEASE(pOutput);
    return hr;
}

CGrayscale::OnProcessOutput 函数

获得输入输出 buffer 指针后调用算法函数进行处理,最后别忘了设置有效 buffer 的长度。

HRESULT CGrayscale::OnProcessOutput(IMFMediaBuffer *pIn, IMFMediaBuffer *pOut)
{
    HRESULT hr = S_OK;
    BYTE *pDest = NULL;         // Destination buffer.
    LONG lDestStride = 0;       // Destination stride.
    BYTE *pSrc = NULL;          // Source buffer.
    LONG lSrcStride = 0;        // Source stride.
    
    VideoBufferLock inputLock(pIn);
    VideoBufferLock outputLock(pOut);
    LONG lDefaultStride = 0;
    CHECK_HR(hr = GetDefaultStride(m_pInputType, &lDefaultStride));    
    CHECK_HR(hr = inputLock.LockBuffer(lDefaultStride, this->m_imageHeightInPixels, &pSrc, &lSrcStride));    
    CHECK_HR(hr = outputLock.LockBuffer(lDefaultStride, m_imageHeightInPixels, &pDest, &lDestStride));
    
    // Invoke the image transform function.
    if (m_pTransformFn)
        (*m_pTransformFn)( pDest, lDestStride, pSrc, lSrcStride, m_imageWidthInPixels, m_imageHeightInPixels);
    else
        CHECK_HR(hr = E_UNEXPECTED);
        
    CHECK_HR(hr = pOut->SetCurrentLength(m_cbImageSize));
done:
    return S_OK;
}

其他框架的滤镜

  • 关于 FFmpeg 的视频滤镜请参考 这里。
  • 关于 DirectShow 的视频滤镜请参考 这里。

Blueware
EOF

你可能感兴趣的:(Multimedia,多媒体开发)