CABAC 是唯一一个应用到HEVC中的熵编码方式,效率明显高于CAVLC编码。
但是CABAC有天然的serial operation dependence,致使硬件实施很困难,尤其是real-time 要求和mobile 设备等。
CABAC 用到的上下文模型中, 残差信号也就是转移系数相关的上下文占了70%左右的bins,对熵编码的性能影响也是显而易见的
再者就是MVD(motion vector difference )占据了差不多20% 的 bins
对着来那个部分进行优化,以使得CABAC的计算复杂度和数据依赖性的减少都有着重要意义
至于优化,先要了解这部分的parsing 过程
接下来先分析下残差信号的转移系数的parsing过程的code ,希望可以共同学习
在类TEncSbac下定义了所有模块的syntax elements parsing 过程函数, codeCoeffN*N 就是针对转移系数的函数:
Void TEncSbac::codeCoeffNxN( TComDataCU* pcCU, TCoeff* pcCoef, UInt uiAbsPartIdx, UInt uiWidth, UInt uiHeight, UInt uiDepth, TextType eTType )
{
DTRACE_CABAC_VL( g_nSymbolCounter++ )
DTRACE_CABAC_T( "\tparseCoeffNxN()\teType=" )
DTRACE_CABAC_V( eTType )
DTRACE_CABAC_T( "\twidth=" )
DTRACE_CABAC_V( uiWidth )
DTRACE_CABAC_T( "\theight=" )
DTRACE_CABAC_V( uiHeight )
DTRACE_CABAC_T( "\tdepth=" )
DTRACE_CABAC_V( uiDepth )
DTRACE_CABAC_T( "\tabspartidx=" )
DTRACE_CABAC_V( uiAbsPartIdx )
DTRACE_CABAC_T( "\ttoCU-X=" )
DTRACE_CABAC_V( pcCU->getCUPelX() )
DTRACE_CABAC_T( "\ttoCU-Y=" )
DTRACE_CABAC_V( pcCU->getCUPelY() )
DTRACE_CABAC_T( "\tCU-addr=" )
DTRACE_CABAC_V( pcCU->getAddr() )
DTRACE_CABAC_T( "\tinCU-X=" )
DTRACE_CABAC_V( g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ] )
DTRACE_CABAC_T( "\tinCU-Y=" )
DTRACE_CABAC_V( g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ] )
DTRACE_CABAC_T( "\tpredmode=" )
DTRACE_CABAC_V( pcCU->getPredictionMode( uiAbsPartIdx ) )
DTRACE_CABAC_T( "\n" )
if( uiWidth > m_pcSlice->getSPS()->getMaxTrSize() )
{
uiWidth = m_pcSlice->getSPS()->getMaxTrSize();
uiHeight = m_pcSlice->getSPS()->getMaxTrSize();
}
UInt uiNumSig = 0;
// compute number of significant coefficients
uiNumSig = TEncEntropy::countNonZeroCoeffs(pcCoef, uiWidth * uiHeight); //@Note: the nonzero coefficients are calculted in TU level
if ( uiNumSig == 0 )
return;
if(pcCU->getSlice()->getPPS()->getUseTransformSkip())
{
codeTransformSkipFlags( pcCU,uiAbsPartIdx, uiWidth, uiHeight, eTType );
}
eTType = eTType == TEXT_LUMA ? TEXT_LUMA : ( eTType == TEXT_NONE ? TEXT_NONE : TEXT_CHROMA ); //@Note: fixed the text model: Luma, Chroma, None
//----- encode significance map -----
const UInt uiLog2BlockSize = g_aucConvertToBit[ uiWidth ] + 2; //@Note: Char g_aucConvertToBit [ MAX_CU_SIZE+1 ]; MAX_CU_SIZE = (1<< MAX_CU_DEPTH) = 64
UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx)); //@Note: return the scanIdx, Veri. Hori. Diag.
const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlockSize - 1 ]; //@Note: scan order table [3][6]
Bool beValid;
if (pcCU->getCUTransquantBypass(uiAbsPartIdx)) //@Note: if cu_transquant_bypass_flag = 1, the scanning and transform, in-loop filter are bypass
{
beValid = false;
}
else //@Note: if cu_transquant_bypass_flag = 0, check the hide flag for coefficient sign
{
beValid = pcCU->getSlice()->getPPS()->getSignHideFlag() > 0;
}
// Find position of last coefficient
Int scanPosLast = -1; //@Note: do ... while
Int posLast;
const UInt * scanCG;
{ //@Note: g_auiSigLastScan[3][6]
scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlockSize > 3 ? uiLog2BlockSize-2-1 : 0 ]; //@Note: TU 16*16: g_auiSigLastScan[uiScanIdx][1]
if( uiLog2BlockSize == 3 ) //@Note: TU 8*8: g_sigLastScan8x8[uiScanIdx]
{
scanCG = g_sigLastScan8x8[ uiScanIdx ];
}
else if( uiLog2BlockSize == 5 ) //@Note: TU 32*32: g_sigLastScanCG32x32[64]
{
scanCG = g_sigLastScanCG32x32;
}
}
UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ]; //@Note: MLS_GRP_NUM 64 max coefficient group number , when TU size is 32*32
static const UInt uiShift = MLS_CG_SIZE >> 1; //@Note: shift = CGwidth / 2 4/2=2 MLS_CG_SIZE = 4
const UInt uiNumBlkSide = uiWidth >> uiShift; //@Note: uiNumBlkSide = width/4
::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM ); //@Note: set a memory space for the array uiSigCoeffGroupFlag[MLS_GRP_NUM ]; store the sub block whether is 0 or 1 , and the maximum is 64
do
{
posLast = scan[ ++scanPosLast ]; //scan all coeff, no matter zero or nonzero //@Note: the scan order is the original order for a array: first row , second row,.... posLast is the position point not coordinate
// get L1 sig map
UInt uiPosY = posLast >> uiLog2BlockSize; //@Note: From the last position to get which CG the last position in.
UInt uiPosX = posLast - ( uiPosY << uiLog2BlockSize ); //@Note: uiPosX/Y are the coordinate in TU ; the posLast>>uiLog2BlockSize means divide the block size(2^uiLog2BlockSize) to get the coordinate
UInt uiBlkIdx = uiNumBlkSide * (uiPosY >> uiShift) + (uiPosX >> uiShift); //@Note: Index of each 4*4 block in TU; the index is the position of each CG not coordinate; first row, second row,...
if( pcCoef[ posLast ] ) //@Note: If the last significant coefficient is found, the CG is marked as 1; as follow
{
uiSigCoeffGroupFlag[ uiBlkIdx ] = 1; //@Note: Mark the current CG including last position; complexity tranfer!!!!!!!!!!
}
uiNumSig -= ( pcCoef[ posLast ] != 0 ); //@Note: Untill finish all the nonzero coefficient: uiNumSig
}
while ( uiNumSig > 0 ); //@Note: unitll no nonzerro coefficient in current TU
//@Note: after the do...while, obtain the posLast, say the last coefficient position
// Code position of last coefficient
Int posLastY = posLast >> uiLog2BlockSize; //@Note: transfer the last coefficient scan position to the XY coordinate
Int posLastX = posLast - ( posLastY << uiLog2BlockSize );
codeLastSignificantXY(posLastX, posLastY, uiWidth, uiHeight, eTType, uiScanIdx); //@Note: encode the last coefficient XY coordinates
//===== code significance flag =====
ContextModel * const baseCoeffGroupCtx = m_cCUSigCoeffGroupSCModel.get( 0, eTType ); //@Note: access the coded_sub_block_flag's context
ContextModel * const baseCtx = (eTType==TEXT_LUMA) ? m_cCUSigSCModel.get( 0, 0 ) : m_cCUSigSCModel.get( 0, 0 ) + NUM_SIG_FLAG_CTX_LUMA; //@Note: Luma 27, Chroma 15, total 42 contexts for significant_coeff_flag
const Int iLastScanSet = scanPosLast >> LOG2_SCAN_SET_SIZE; //@Note: LOG2_SCAN_SET_SIZE is 4; each set has 16 coefficients; get the total sets numbers
UInt c1 = 1;
UInt uiGoRiceParam = 0;
Int iScanPosSig = scanPosLast; //@Note:
for( Int iSubSet = iLastScanSet; iSubSet >= 0; iSubSet-- )//start point to CG //@Note start point the encoding process of each sub block
{ //@Note: iSubSet is the sub-block scan index i which is defined in standard.
Int numNonZero = 0;
Int iSubPos = iSubSet << LOG2_SCAN_SET_SIZE;
uiGoRiceParam = 0;
Int absCoeff[16];
UInt coeffSigns = 0;
Int lastNZPosInCG = -1, firstNZPosInCG = SCAN_SET_SIZE; //@Note: firstNZPosInCG is defined as 16 in each CG
if( iScanPosSig == scanPosLast ) //@Note: the define detail in last CG including the last position
{
absCoeff[ 0 ] = abs( pcCoef[ posLast ] ); //@Note: define the last position of TU is the first position to be calculated in absCoeff[16]
coeffSigns = ( pcCoef[ posLast ] < 0 ); //@Note: if positive, sign = 0; if negetive, sign =1;
numNonZero = 1; //@Note: mark the last coefficient in CG with numNonZero=1;
lastNZPosInCG = iScanPosSig;
firstNZPosInCG = iScanPosSig;
iScanPosSig--;
}
// encode significant_coeffgroup_flag
Int iCGBlkPos = scanCG[ iSubSet ]; //@Note: according to different TU, scanCG is different
Int iCGPosY = iCGBlkPos / uiNumBlkSide; //@Note: change the CG position to CG coordinate, uiNumBlkSide= width/4
Int iCGPosX = iCGBlkPos - (iCGPosY * uiNumBlkSide);
if( iSubSet == iLastScanSet || iSubSet == 0) //@Note: the CG including the last posiiton and DC coefficient(first CG) are inferred as 1, no need to encode
{
uiSigCoeffGroupFlag[ iCGBlkPos ] = 1; //@Note:achieve the information from the array uiSigCoeffGroupFlag[ ]
}
else
{
UInt uiSigCoeffGroup = (uiSigCoeffGroupFlag[ iCGBlkPos ] != 0); //@Note: get the value of coded_sub_block_flag
UInt uiCtxSig = TComTrQuant::getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, iCGPosX, iCGPosY, uiWidth, uiHeight ); //@Note: get the contex increment index ctxInc
m_pcBinIf->encodeBin( uiSigCoeffGroup, baseCoeffGroupCtx[ uiCtxSig ] ); //@Note: uiCtxSig only has two value 0 and 1
}
// encode significant_coeff_flag
if( uiSigCoeffGroupFlag[ iCGBlkPos ] ) //@Note: mark the current CG is 1
{
Int patternSigCtx = TComTrQuant::calcPatternSigCtx( uiSigCoeffGroupFlag, iCGPosX, iCGPosY, uiWidth, uiHeight );
UInt uiBlkPos, uiPosY, uiPosX, uiSig, uiCtxSig;
for( ; iScanPosSig >= iSubPos; iScanPosSig-- )
{
uiBlkPos = scan[ iScanPosSig ]; //@Note: Position in TU
uiPosY = uiBlkPos >> uiLog2BlockSize; //@Note: uiPosY and uiPosX are the coordinates in TU
uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockSize );
uiSig = (pcCoef[ uiBlkPos ] != 0); //@Note: get the value of significant_coeff_flag; pcCoef is an array for all the coefficients
if( iScanPosSig > iSubPos || iSubSet == 0 || numNonZero )
{
uiCtxSig = TComTrQuant::getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlockSize, eTType );
m_pcBinIf->encodeBin( uiSig, baseCtx[ uiCtxSig ] ); //@Note: Luma 27, Chroma 15, total 42 contexts for significant_coeff_flag
} //@Note: get context used get() function for luma and chroma
if( uiSig ) //@Note: if it is the nonzero coefficient
{
absCoeff[ numNonZero ] = abs( pcCoef[ uiBlkPos ] ); //@Note: store the nonzero coefficient in absCoeff[]
coeffSigns = 2 * coeffSigns + ( pcCoef[ uiBlkPos ] < 0 ); //@Note: why??????????
numNonZero++;
if( lastNZPosInCG == -1 )
{
lastNZPosInCG = iScanPosSig;
}
firstNZPosInCG = iScanPosSig;
} //end the nonzero abs level and sign mark
} //end ???????
} // end one CG = 1significant_coeff_flag parsing
else // if the current CG is 0
{
iScanPosSig = iSubPos - 1;
}
if( numNonZero > 0 ) //@Note: if at least one nonzero coefficient
{
Bool signHidden = ( lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD );
UInt uiCtxSet = (iSubSet > 0 && eTType==TEXT_LUMA) ? 2 : 0; //@Note: if the sub block index is 0 or cIdx is larger than 0(chroma), the ctxset is 0
//@Note: uiCtxSet is used to define the current context set.
if( c1 == 0 )
{
uiCtxSet++;
}
c1 = 1;
ContextModel *baseCtxMod = ( eTType==TEXT_LUMA ) ? m_cCUOneSCModel.get( 0, 0 ) + 4 * uiCtxSet : m_cCUOneSCModel.get( 0, 0 ) + NUM_ONE_FLAG_CTX_LUMA + 4 * uiCtxSet; //@Note: Luma 16 for greater1 flag, 8 for chroma; total 24 contexts
Int numC1Flag = min(numNonZero, C1FLAG_NUMBER); //@Note: limit the greater1 flag as 8
Int firstC2FlagIdx = -1; //@Note:
for( Int idx = 0; idx < numC1Flag; idx++ ) //@Note: encoding the greater than 1 flag
{
UInt uiSymbol = absCoeff[ idx ] > 1; //@Note: get the value of greater1_flag
m_pcBinIf->encodeBin( uiSymbol, baseCtxMod[c1] ); //@Note:
if( uiSymbol ) //greater1_flag =1
{
c1 = 0; //greater1Ctx is set as 0
if (firstC2FlagIdx == -1) //@Note: the predefined value for the first greater2 flag index in CG is -1
{
firstC2FlagIdx = idx; //@Note: set the first greater2 flag index as the index of the (first greater1 flag is 1); actually, here,idx is the only 0
}
}
else if( (c1 < 3) && (c1 > 0) ) //@Note: c1 is 1 or 2;
{
c1++;
}
} //end 8 greater1
if (c1 == 0)
{
baseCtxMod = ( eTType==TEXT_LUMA ) ? m_cCUAbsSCModel.get( 0, 0 ) + uiCtxSet : m_cCUAbsSCModel.get( 0, 0 ) + NUM_ABS_FLAG_CTX_LUMA + uiCtxSet; //@Note: NUM_ABS_FLAG_CTX_LUMA = 4;
if ( firstC2FlagIdx != -1)
{
UInt symbol = absCoeff[ firstC2FlagIdx ] > 2;
m_pcBinIf->encodeBin( symbol, baseCtxMod[0] );
}
}
if( beValid && signHidden )
{
m_pcBinIf->encodeBinsEP( (coeffSigns >> 1), numNonZero-1 );
}
else
{
m_pcBinIf->encodeBinsEP( coeffSigns, numNonZero );
}
Int iFirstCoeff2 = 1;
if (c1 == 0 || numNonZero > C1FLAG_NUMBER)
{
for ( Int idx = 0; idx < numNonZero; idx++ )
{
UInt baseLevel = (idx < C1FLAG_NUMBER)? (2 + iFirstCoeff2 ) : 1;
if( absCoeff[ idx ] >= baseLevel)
{
xWriteCoefRemainExGolomb( absCoeff[ idx ] - baseLevel, uiGoRiceParam );
if(absCoeff[idx] > 3*(1<(uiGoRiceParam+ 1, 4);
}
}
if(absCoeff[ idx ] >= 2)
{
iFirstCoeff2 = 0;
}
}
}//end of sign coding
}//end of g1,g2, sign coding, and finish the if(NumNonzero > 0)
} //@Note: end one CG
return;
}
这样做的好处就是减化扫描过程,比如一个N*N的TU,最坏情况只需扫描2N-2就行,而H.264中的交叉最后非零系数和非零系数的标记过程中,最坏情况是N*N-1,如果N是32时, 复杂度可想而知。
另外, HEVC中,最后非零系数是先在TU-level 确定了的,这是与H.264/AVC不同的地方,减少了dependence
下面是LastXY坐标的parsing 过程:
Void TEncSbac::codeLastSignificantXY( UInt uiPosX, UInt uiPosY, Int width, Int height, TextType eTType, UInt uiScanIdx )
{
// swap
if( uiScanIdx == SCAN_VER ) //@Note: If the scan order is vertical, the XY coordinates are interchange.
{
swap( uiPosX, uiPosY );
}
UInt uiCtxLast; //@Note:
ContextModel *pCtxX = m_cCuCtxLastX.get( 0, eTType );
ContextModel *pCtxY = m_cCuCtxLastY.get( 0, eTType );
UInt uiGroupIdxX = g_uiGroupIdx[ uiPosX ];
UInt uiGroupIdxY = g_uiGroupIdx[ uiPosY ];
Int blkSizeOffsetX, blkSizeOffsetY, shiftX, shiftY;
blkSizeOffsetX = eTType ? 0: (g_aucConvertToBit[ width ] *3 + ((g_aucConvertToBit[ width ] +1)>>2));
blkSizeOffsetY = eTType ? 0: (g_aucConvertToBit[ height ]*3 + ((g_aucConvertToBit[ height ]+1)>>2));
shiftX= eTType ? g_aucConvertToBit[ width ] :((g_aucConvertToBit[ width ]+3)>>2);
shiftY= eTType ? g_aucConvertToBit[ height ] :((g_aucConvertToBit[ height ]+3)>>2);
// posX //@NOte: According to the posX and posY, to get the current TR state, how many 1s and 0.
for( uiCtxLast = 0; uiCtxLast < uiGroupIdxX; uiCtxLast++ )
{
m_pcBinIf->encodeBin( 1, *( pCtxX + blkSizeOffsetX + (uiCtxLast >>shiftX) ) ); //@Note: The ctxInc is blkSizeOffsetX + (uiCtxLast >>shiftX); and uiCtxLast is the bin index
}
if( uiGroupIdxX < g_uiGroupIdx[ width - 1 ])
{
m_pcBinIf->encodeBin( 0, *( pCtxX + blkSizeOffsetX + (uiCtxLast >>shiftX) ) );
}
// posY
for( uiCtxLast = 0; uiCtxLast < uiGroupIdxY; uiCtxLast++ )
{
m_pcBinIf->encodeBin( 1, *( pCtxY + blkSizeOffsetY + (uiCtxLast >>shiftY) ) );
}
if( uiGroupIdxY < g_uiGroupIdx[ height - 1 ])
{
m_pcBinIf->encodeBin( 0, *( pCtxY + blkSizeOffsetY + (uiCtxLast >>shiftY) ) );
}
if ( uiGroupIdxX > 3 ) //@Note: for the TU blockn size larger than 4*4; say , 8*8, 16*16, 32*32
{
UInt uiCount = ( uiGroupIdxX - 2 ) >> 1; //@Note: uiCount = (uiGroupIdxX >> 1) - 1, this is bit number
// UInt uiCount = ( uiGroupIdxX >> 1) - 1;
uiPosX = uiPosX - g_uiMinInGroup[ uiGroupIdxX ]; //@Note: get the new index for suffix
for (Int i = uiCount - 1 ; i >= 0; i-- ) //@Note: MSB first, that is when binIdx =0, which indicates the MSB
{
m_pcBinIf->encodeBinEP( ( uiPosX >> i ) & 1 ); //@Note: get the bin balue for bypass mode for Fixed Length code //According to the posX is odd or even
}
}
if ( uiGroupIdxY > 3 )
{
UInt uiCount = ( uiGroupIdxY - 2 ) >> 1;
uiPosY = uiPosY - g_uiMinInGroup[ uiGroupIdxY ];
for ( Int i = uiCount - 1 ; i >= 0; i-- )
{
m_pcBinIf->encodeBinEP( ( uiPosY >> i ) & 1 );
}
}
}
这里需要指出的是,XY坐标的syntax elements包括各自的prefix 和suffix, 主要是prefix ,因为这是regular coding,需要引用文本。 suffix 是bypass mode, 比较容易不涉及文本选择和概率更新。
另外,只有TU size 大于 4时, 才会有suffix出现
文本选择的过程参考标准书里的ctxInc推导过程,code 就是把那个过程实现,大部分都在TComTrQuant.cpp中定义文本选择 。其中涉及的编程思想也很多,实现过程挺有意思。
* 近期有时间再把MVD整理下