在HEVC 参考代码中,一个CTU块通过xcompressCU()函数进行CU递归得到最优的CU深度。
递归的过程可如下图(from:Fast CU Splitting and Pruning for Suboptimal CU Partitioning in HEVC Intra Coding)所示。图中每一个方框表示一个CU块,方框内的数字表示xcompressCU()函数的执行顺序。显而易见,如果能在做xcompressCU()函数之前,将CU的递归深度确定下,显然可以减小HEVC编码器的复杂度。
针对帧内编码器,已经有很多文献提出了提前确定CU递归深度的方法。这里介绍了"Fast CU Size Decision and Mode Decision Algorithm for HEVC Intra Coding"中Section II.A部分的具体实现。在这篇文献中,周边块的CTU depth size用来给当前块深度进行预测。具体的细节可以去查看该文献。
Void TEncCu::compressCU( TComDataCU*& rpcCU ) { // initialize CU data m_ppcBestCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() ); m_ppcTempCU[0]->initCU( rpcCU->getPic(), rpcCU->getAddr() ); memset( m_preAnalyzeDepth, 0, rpcCU->getTotalNumPart() ); memset( m_preAnaDepthDetermined, 0, rpcCU->getTotalNumPart() ); memset( m_preAnaDepthRange , 0, rpcCU->getTotalNumPart() ); // Neighboring CTUs. TComDataCU* t_pcCULeft = rpcCU->getCULeft(); TComDataCU* t_pcCUAbove = rpcCU->getCUAbove(); TComDataCU* t_pcCUAboveLeft = rpcCU->getCUAboveLeft(); TComDataCU* t_pcCUAboveRight= rpcCU->getCUAboveRight(); UInt DepthLeft = 0; // Max Depth of LeftCTU. UInt DepthAbove = 0; // Max Depth of AboveCTU. UInt DepthAboveLeft = 0; UInt DepthAboveRight = 0; UInt picWidth = rpcCU->getSlice()->getSPS()->getPicWidthInLumaSamples(); UInt picHeight = rpcCU->getSlice()->getSPS()->getPicHeightInLumaSamples(); UInt uiLPelX = rpcCU->getCUPelX(); UInt uiRPelX = uiLPelX + rpcCU->getWidth(0) - 1; UInt uiTPelY = rpcCU->getCUPelY(); UInt uiBPelY = uiTPelY + rpcCU->getHeight(0) - 1; UChar tDepth; m_insidePicture= (uiRPelX<picWidth) && (uiBPelY<picHeight); // Considering Border CTUs. if ( t_pcCULeft!=NULL ) //获取左边CTU块最大的depth信息 { for ( Int i=0; i<256; i++ ) { tDepth = t_pcCULeft->getDepth(i); if ( tDepth>DepthLeft ) { DepthLeft = (UInt)tDepth; } } } else DepthLeft = 2; //如果是NULL,直接赋值2(16X16) if ( t_pcCUAbove!=NULL ) { for ( Int i=0; i<256; i++ ) { tDepth = t_pcCUAbove->getDepth(i); if ( tDepth>DepthAbove ) { DepthAbove = (UInt)tDepth; } } } else DepthAbove = 2; if ( t_pcCUAboveLeft!=NULL ) { DepthAboveLeft = t_pcCUAboveLeft->getDepth(g_auiRasterToZscan[16*15+15]); } else DepthAboveLeft = 2; if ( t_pcCUAboveRight!=NULL ) { DepthAboveRight = t_pcCUAboveRight->getDepth(g_auiRasterToZscan[16*15]); } else DepthAboveRight = 2; Double DepthPre = 0.3*DepthLeft+0.3*DepthAbove+0.2*DepthAboveLeft+0.2*DepthAboveRight; // 论文中Prediction Depth Type if ( DepthPre<=0.5 ) // 依据论文中的公式给出最小的depth level和最大的depth level { memset( m_preAnaDepthDetermined, 1, 256 ); memset( m_preAnaDepthRange, 2, 256 ); memset( m_preAnalyzeDepth, 0, 256 ); } else if ( DepthPre<=1.5 ) { memset( m_preAnaDepthDetermined, 1, 256 ); memset( m_preAnaDepthRange, 3, 256 ); memset( m_preAnalyzeDepth, 0, 256 ); } else { memset( m_preAnaDepthDetermined, 1, 256 ); memset( m_preAnaDepthRange, 3, 256 ); memset( m_preAnalyzeDepth, 1, 256 ); } DEBUG_STRING_NEW(sDebug) xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) ); DEBUG_STRING_OUTPUT(std::cout, sDebug) // Double Check. UInt MaxDepthSize=0; // UInt CTUPelX, CTUPelY; if ( m_insidePicture ) { for ( Int i=0; i<256; i++ ) { // Decisioned. tDepth = m_ppcBestCU[0]->getDepth(i); UChar cuDepth = m_preAnalyzeDepth[i]; UChar cuPreDetermined = m_preAnaDepthDetermined[i]; UChar cuRange = m_preAnaDepthRange[i]; if ( tDepth<cuDepth && tDepth>=cuDepth+cuRange ) { assert(0); } } } #if ADAPTIVE_QP_SELECTION if( m_pcEncCfg->getUseAdaptQpSelect() ) { if(rpcCU->getSlice()->getSliceType()!=I_SLICE) //IIII { xLcuCollectARLStats( rpcCU); } } #endif }
在xcompressCU函数中加入相关条件跳转。
// If slice start or slice end is within this cu... TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx()); Bool bSliceStart = pcSlice->getSliceSegmentCurStartCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurStartCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart(); Bool bSliceEnd = (pcSlice->getSliceSegmentCurEndCUAddr()>rpcTempCU->getSCUAddr()&&pcSlice->getSliceSegmentCurEndCUAddr()<rpcTempCU->getSCUAddr()+rpcTempCU->getTotalNumPart()); Bool bInsidePicture = ( uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) && ( uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ); // Fast CU decision Process. // When Current depth is not in the PreAnalyzedDepth Range, it just skips the PU/TU Decision process. // Added by xfHuang. Bool t_enCUSkip=false; if ( m_insidePicture ) { // Split Analysis For CU32X32 And CU16X16. if ( checkCurDepthInPreAnaRange( rpcBestCU, uiDepth ) == false ) //如果当前的depth level不在预测的depth level之内,后面直接将cost赋值成最大,不进行后面的预测操作。 { t_enCUSkip = true; rpcBestCU->getTotalCost() = MAX_DOUBLE/16; rpcBestCU->getTotalDistortion() = MAX_UINT>>3; rpcBestCU->getTotalBits() = MAX_UINT>>3; // avoid assert disable. if ( uiDepth==3 ) { rpcBestCU->setPartitionSize ( 0, SIZE_2Nx2N ); rpcBestCU->setPredictionMode( 0, MODE_INTRA ); } } } // We need to split, so don't try these modes. if(!bSliceEnd && !bSliceStart && bInsidePicture ) { if( t_enCUSkip==false ) { for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++) { const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP); if (bIsLosslessMode) { iQP = lowestQP; } rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
Bool TEncCu::checkCurDepthInPreAnaRange( TComDataCU*& pCU, UInt uidepth ) { UChar cuDepth = m_preAnalyzeDepth[pCU->getZorderIdxInCU()]; UChar cuPreDetermined = m_preAnaDepthDetermined[pCU->getZorderIdxInCU()]; UChar cuRange = m_preAnaDepthRange[pCU->getZorderIdxInCU()]; assert(cuDepth+cuRange<=5); if ( /*cuPreDetermined &&*/ uidepth>=cuDepth && uidepth<cuDepth+cuRange ) { return true; } else { return false; } }
以上是一种基于周边CTU块信息来进行CU深度优化的一种方法。这个方法对于大部分来说只是不做64X64这一层depth,因此性能损失很小,平均大概在0.2%左右。时间可以节省10%左右。
[转载请注明作者和出处]