在很多视频播放的软件当中,字幕的处理是免不了的,有些可能本身已经加载到图像当中未做处理,但大部分都是通过字符叠加来进行处理的。DirectShow 的字符叠加Filter在这些软件中都扮演这绝佳的作用。这一节来介绍DirectShow字符叠加Filter编码的实现,如下详细介绍;
这个Filter的大概作用是在视频流指定的一段时间内进行字符叠加,字符字体、大小、颜色都进行控制,普遍支持RGB的各种编码格式,同时实现字符的其他效果例如滚动的。如下来看看具体的编码实现;
注册表的配置对于Filter的开发者来说都大同小异,主要对g_Templates进行配置,如下代码;
// {E3FB4BFE-8E5C-4aec-8162-7DA55BE486A1}
DEFINE_GUID(CLSID_HQTitleOverlay,
0xe3fb4bfe, 0x8e5c, 0x4aec, 0x81, 0x62, 0x7d, 0xa5, 0x5b, 0xe4, 0x86, 0xa1);
// {E70FE57A-19AA-4a4c-B39A-408D49D73851}
DEFINE_GUID(CLSID_HQTitleOverlayProp,
0xe70fe57a, 0x19aa, 0x4a4c, 0xb3, 0x9a, 0x40, 0x8d, 0x49, 0xd7, 0x38, 0x51);
……
// List of class IDs and creator functions for the class factory. This
// provides the link between the OLE entry point in the DLL and an object
// being created. The class factory will call the static CreateInstance
CFactoryTemplate g_Templates[] =
{
{
L"HQ Title Overlay Std.",
&CLSID_HQTitleOverlay,
CFilterTitleOverlay::CreateInstance,
NULL,
&sudFilter
},
{
L"HQ Title Overlay Property Page",
&CLSID_HQTitleOverlayProp,
CTitleOverlayProp::CreateInstance
}
};
CFilterTitleOverlay类设计是整个Filter的关键所在,其中最重要的是父类的选择。如下代码;
class CFilterTitleOverlay : public CTransInPlaceFilter
, public ISpecifyPropertyPages
, public ITitleOverlay
CTransInPlaceFilter类是CFilterTitleOverlay功能实现的关键只关键,该类可以提供在视频传输的过程中截获数据流,方便于字符叠加;
ISpecifyPropertyPages类是CFilterTitleOverlay提供属性页面支持功能,ITitleOverlay是一个Interface接口的纯虚类,也就是所谓的接口,如下来看看这个接口是如何实现;
// ITitleOverlay
// 原型如下
// interface __declspec(novtable) ITitleOverlay : public IUnknown
DECLARE_INTERFACE_(ITitleOverlay, IUnknown)
{
// 设置Filter进行叠加的类型,如果需要改变类型,这个函数必须第一个设置,
// 调用这个函数成功后,才能调用其他的函数进行参数设置。
// 可以设置的叠加类型参见枚举类型OVERLAY_TYPE的定义。
// 如下原型
// virtual HRESULT _stcall put_TitleOverlayType(long inOverlayType) = 0
STDMETHOD(put_TitleOverlayType) (THIS_
long inOverlayType
) PURE;
……
};
先从CFilterTitleOverlay类的构造函数说起,在CFilterTitleOverlay构造函数中有一个对象是必须创建的,这个对象就是COverlayController,COverlayController是用来控制叠加效果的通用类,如下代码;
CFilterTitleOverlay::CFilterTitleOverlay(TCHAR *tszName, LPUNKNOWN punk, HRESULT *phr) :
CTransInPlaceFilter(tszName, punk, CLSID_HQTitleOverlay, phr)
{
mOverlayType = OT_STATIC;
mOverlayController = new COverlayController();
mNeedEstimateFrameRate = FALSE;
char szTitle[] = "Hello, DirectShow!";
put_Title(szTitle, sizeof(szTitle));
}
这个函数不用多说了,是CUnknown中的一个虚函数,其中这个函数主要用来创建CFilterTitleOverlay对象,是COM组件必须具备的函数,否则就失去COM组件的意义,如下代码详解;
CUnknown * WINAPI CFilterTitleOverlay::CreateInstance(LPUNKNOWN punk, HRESULT *phr)
{
#if 1
// 做指定应用程序验证
char szCreatorPath[256], szCreatorName[256];
::strcpy(szCreatorPath, "");
::strcpy(szCreatorName, "");
HMODULE hModule = ::GetModuleHandle(NULL);
::GetModuleFileName(hModule, szCreatorPath, 256);
char * backSlash = ::strrchr(szCreatorPath, '//');
if (backSlash)
{
strcpy(szCreatorName, backSlash);
}
::_strlwr(szCreatorName);
// Please specify your app name with lowercase
if (::strstr(szCreatorName, "graphedt") == NULL &&
::strstr(szCreatorName, "ourapp") == NULL)
{
*phr = E_FAIL;
return NULL;
}
#endif
// 创建CFilterTitleOverlay对象
CFilterTitleOverlay *pNewObject = new CFilterTitleOverlay(NAME("TitleOverlay"), punk, phr);
return pNewObject;
}
Transform函数是整个字符叠加处理的关键,再这个函数中可以捕获需要处理的数据(RGB格式)如下来看看具体的实现;
HRESULT CFilterTitleOverlay::Transform(IMediaSample *pSample)
{
// If we cann't read frame rate info from input pin's connection media type,
// We estimate it from the first sample's time stamp!
……
if (mOverlayType != OT_NONE)
{
PBYTE pData = NULL;
pSample->GetPointer(&pData);
mOverlayController->DoTitleOverlay(pData);
}
return NOERROR;
}
代码中最为关键的DoTitleOverlay函数就是实现字符叠加的函数,这个函数是COverlayController类中的一个成员函数,如下来看看它是如何实现的;
if (mImageHeight > mTitleSize.cy && mTitleSize.cx > 0 && mTitleSize.cy > 0)
{
……
PBYTE pStartPos = pTopLine + mStartPos.y * strideInBytes + mStartPos.x * mImageBitCount / 8;
for (DWORD dwY = 0; dwY < (DWORD)mTitleSize.cy; dwY++)
{
PBYTE pbTitle = mTitleDIBBits + mDIBWidthInBytes * ((DWORD)mTitleSize.cy - dwY - 1);
// Point to the valid start position of title DIB
pbTitle += (mValidTitleRect.left >> 3);
long startLeft = mValidTitleRect.left % 8;
long endRight = startLeft + mValidTitleRect.right - mValidTitleRect.left;
for (long dwX = startLeft; dwX < endRight; dwX++)
{
if ( !((0x80 >> (dwX & 7)) & pbTitle[dwX >> 3]) )
{
PBYTE pbPixel = mPixelConverter->NextNPixel(pStartPos, dwX - startLeft);
if (mIsOverlayByCover)
{
// 进行RGB数据复值,24三占用三字节
mPixelConverter->ConvertByCover(pbPixel);
}
else
{
mPixelConverter->ConvertByReverse(pbPixel);
}
}
}
pStartPos += strideInBytes;
}
}
这个函数用于创建字符位图,创建一个DIB位图的虚拟内存空间,保存RGB数据格式,再通过GetDIBits函数获取数据缓冲区用于字符叠加之用,如下代码;
HBITMAP COverlayController::ActualCreateTitleDIB(HDC inDC)
{
// DIB info we used to create title pixel-mapping.
// The system default color policy is:
// Initial Whole Black, while output area White-background and Black-text.
struct {
BITMAPINFOHEADER bmiHeader;
DWORD rgbEntries[2];
} bmi =
{
{
sizeof(BITMAPINFOHEADER),
0,
0,
1,
1,
BI_RGB,
0,
0,
0
},
{
0x00000000,
0xFFFFFFFF
}
};
……
// Set proper DIB size here! Important!
bmi.bmiHeader.biHeight = mTitleSize.cy;
bmi.bmiHeader.biWidth = mTitleSize.cx;
HBITMAP hbm = CreateDIBitmap(inDC, &bmi.bmiHeader, 0, NULL, NULL, 0);
BOOL pass = (hbm != NULL);
// Draw title after selecting DIB into the DC
if (pass)
{
HGDIOBJ hobj = SelectObject(inDC, hbm);
pass = ExtTextOut(inDC, 0, 0, ETO_OPAQUE | ETO_CLIPPED, NULL,
mTitle, lstrlen(mTitle), NULL);
SelectObject(inDC, hobj);
}
// Get the title-drew DIB bits
if (pass)
{
ReleaseTitleDIB();
// Attention: To get bitmap data from the DIB object,
// the scan line must be a multiple of 4 (DWORD)!
// If the actual bitmap data is not exactly fit for DWORD,
// The rest of DWORD bits will be filled automatically.
// So we should expand to bytes and round up to a multiple of 4.
mDIBWidthInBytes = ((mTitleSize.cx + 31) >> 3) & ~3;
mTitleDIBBits = new BYTE[mDIBWidthInBytes * mTitleSize.cy];
memset(mTitleDIBBits, 0, mDIBWidthInBytes * mTitleSize.cy);
LONG lLines = GetDIBits(inDC, hbm, 0, mTitleSize.cy, (PVOID)mTitleDIBBits,
(BITMAPINFO *)&bmi, DIB_RGB_COLORS);
pass = (lLines != 0);
}
……
return hbm;
}
CompleteConnect函数是用来完成output pin 与 下一个input pin连接只用,也是构建Fiters 链的必备函数,如下代码;
HRESULT CFilterTitleOverlay::CompleteConnect(PIN_DIRECTION direction, IPin *pReceivePin)
{
HRESULT hr = CTransInPlaceFilter::CompleteConnect(direction, pReceivePin);
if (SUCCEEDED(hr) && direction == PINDIR_INPUT)
{
hr = SetInputVideoInfoToController();
}
return hr;
}
这个类从CBasePropertyPage直接继承的,用于配置和观察这个Filters 属性之用。CTitleOverlayProp其实是一个窗体,类似一个应用程序,不过我们可以直接对其进行消息捕捉,如下代码;
BOOL CTitleOverlayProp::OnReceiveMessage(HWND hwnd,
UINT uMsg,
WPARAM wParam,
LPARAM lParam)
{
switch (uMsg)
{
case WM_INITDIALOG:
{
// Get windows' handles
m_hOverlayType = GetDlgItem(hwnd, IDC_COMBO_OVERLAY_TYPE);
m_hEditTilte = GetDlgItem(hwnd, IDC_EDIT_TITLE);
m_hEditStartX = GetDlgItem(hwnd, IDC_EDIT_STARTX);
m_hEditStartY = GetDlgItem(hwnd, IDC_EDIT_STARTY);
m_hEditStartTime = GetDlgItem(hwnd, IDC_EDIT_STARTTIME);
m_hEditEndTime = GetDlgItem(hwnd, IDC_EDIT_ENDTIME);
m_hEditColorR = GetDlgItem(hwnd, IDC_EDIT_COLORR);
m_hEditColorG = GetDlgItem(hwnd, IDC_EDIT_COLORG);
m_hEditColorB = GetDlgItem(hwnd, IDC_EDIT_COLORB);
break;
}
case WM_COMMAND:
{
if (HIWORD(wParam) == BN_CLICKED)
{
switch (LOWORD(wParam))
{
case IDC_BUTTON_CHANGE_FONT:
OnButtonChangeFont();
break;
}
}
SetDirty();
break;
}
}
return CBasePropertyPage::OnReceiveMessage(hwnd,uMsg,wParam,lParam);
} // OnReceiveMessage
有了这个消息捕捉函数当然就可以直接对所需要配置的参数进行配置了。同时在继承CBasePropertyPage的时候为了方便CBasePropertyPage还提供了其他几个接口如CreateInstance、OnConnect、OnActivate等。
整个字符叠加的Fiters编码就基本上完成了,其中几个地方还是需要在次提醒,第一、对基类的选择,一定要选择正确的基类,这样才能达到事半工倍。第二、处理字符叠加时一定要注意帧频率,否则会产生错位。第三、字符需要绘制到一段虚拟的内存当中,不能直接绘制。字符叠加的应用非常广泛,估计暴风影音的字符叠加功能就是这样做的!