残差信号编码(residual coding) 和CABAC 中TU-level 的上下文parsing 代码分析

CABAC 是唯一一个应用到HEVC中的熵编码方式,效率明显高于CAVLC编码。

但是CABAC有天然的serial operation dependence,致使硬件实施很困难,尤其是real-time 要求和mobile 设备等。

CABAC 用到的上下文模型中, 残差信号也就是转移系数相关的上下文占了70%左右的bins,对熵编码的性能影响也是显而易见的

再者就是MVD(motion vector difference )占据了差不多20% 的 bins 

对着来那个部分进行优化,以使得CABAC的计算复杂度和数据依赖性的减少都有着重要意义

至于优化,先要了解这部分的parsing 过程

接下来先分析下残差信号的转移系数的parsing过程的code ,希望可以共同学习

在类TEncSbac下定义了所有模块的syntax elements parsing 过程函数, codeCoeffN*N 就是针对转移系数的函数:

Void TEncSbac::codeCoeffNxN( TComDataCU* pcCU, TCoeff* pcCoef, UInt uiAbsPartIdx, UInt uiWidth, UInt uiHeight, UInt uiDepth, TextType eTType )
{
  DTRACE_CABAC_VL( g_nSymbolCounter++ )
  DTRACE_CABAC_T( "\tparseCoeffNxN()\teType=" )
  DTRACE_CABAC_V( eTType )
  DTRACE_CABAC_T( "\twidth=" )
  DTRACE_CABAC_V( uiWidth )
  DTRACE_CABAC_T( "\theight=" )
  DTRACE_CABAC_V( uiHeight )
  DTRACE_CABAC_T( "\tdepth=" )
  DTRACE_CABAC_V( uiDepth )
  DTRACE_CABAC_T( "\tabspartidx=" )
  DTRACE_CABAC_V( uiAbsPartIdx )
  DTRACE_CABAC_T( "\ttoCU-X=" )
  DTRACE_CABAC_V( pcCU->getCUPelX() )
  DTRACE_CABAC_T( "\ttoCU-Y=" )
  DTRACE_CABAC_V( pcCU->getCUPelY() )
  DTRACE_CABAC_T( "\tCU-addr=" )
  DTRACE_CABAC_V(  pcCU->getAddr() )
  DTRACE_CABAC_T( "\tinCU-X=" )
  DTRACE_CABAC_V( g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ] )
  DTRACE_CABAC_T( "\tinCU-Y=" )
  DTRACE_CABAC_V( g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ] )
  DTRACE_CABAC_T( "\tpredmode=" )
  DTRACE_CABAC_V(  pcCU->getPredictionMode( uiAbsPartIdx ) )
  DTRACE_CABAC_T( "\n" )

  if( uiWidth > m_pcSlice->getSPS()->getMaxTrSize() )
  {
    uiWidth  = m_pcSlice->getSPS()->getMaxTrSize();
    uiHeight = m_pcSlice->getSPS()->getMaxTrSize();
  }
  
  UInt uiNumSig = 0;
  
  // compute number of significant coefficients
  uiNumSig = TEncEntropy::countNonZeroCoeffs(pcCoef, uiWidth * uiHeight);                                                //@Note: the nonzero coefficients are calculted in TU level
  
  if ( uiNumSig == 0 )
    return;
  if(pcCU->getSlice()->getPPS()->getUseTransformSkip())
  {
    codeTransformSkipFlags( pcCU,uiAbsPartIdx, uiWidth, uiHeight, eTType );
  }
  eTType = eTType == TEXT_LUMA ? TEXT_LUMA : ( eTType == TEXT_NONE ? TEXT_NONE : TEXT_CHROMA );                          //@Note: fixed the text model: Luma, Chroma, None
       
  //----- encode significance map -----  
  const UInt   uiLog2BlockSize = g_aucConvertToBit[ uiWidth ] + 2;                                                       //@Note: Char  g_aucConvertToBit  [ MAX_CU_SIZE+1 ];    MAX_CU_SIZE = (1<< MAX_CU_DEPTH) = 64

  UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));          //@Note: return the scanIdx, Veri. Hori. Diag.
 
  const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlockSize - 1 ];                                                //@Note: scan order table [3][6]
  
  Bool beValid;
  if (pcCU->getCUTransquantBypass(uiAbsPartIdx))                                                                        //@Note: if cu_transquant_bypass_flag = 1, the scanning and transform, in-loop filter are bypass
  {
    beValid = false;
  }
  else                                                                                                                   //@Note: if cu_transquant_bypass_flag = 0, check the hide flag for coefficient sign
  {
    beValid = pcCU->getSlice()->getPPS()->getSignHideFlag() > 0;
  }

  // Find position of last coefficient
  Int scanPosLast = -1;                                                                                                  //@Note: do ... while
  Int posLast;  

  const UInt * scanCG;
  {                                                                                                                       //@Note: g_auiSigLastScan[3][6]
    scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlockSize > 3 ? uiLog2BlockSize-2-1 : 0 ];                              //@Note: TU 16*16: g_auiSigLastScan[uiScanIdx][1]
    if( uiLog2BlockSize == 3 )                                                                                            //@Note: TU 8*8:   g_sigLastScan8x8[uiScanIdx]
    {
      scanCG = g_sigLastScan8x8[ uiScanIdx ];                                                                             
    }
    else if( uiLog2BlockSize == 5 )                                                                                       //@Note: TU 32*32:  g_sigLastScanCG32x32[64]
    {
      scanCG = g_sigLastScanCG32x32;
    }
  }
  UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];                                          //@Note: MLS_GRP_NUM 64       max coefficient group number , when TU size is 32*32
  static const UInt uiShift = MLS_CG_SIZE >> 1;                                     //@Note: shift = CGwidth / 2    4/2=2          MLS_CG_SIZE = 4
  const UInt uiNumBlkSide = uiWidth >> uiShift;                                     //@Note: uiNumBlkSide  =  width/4

    ::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );                 //@Note: set a memory space for the array uiSigCoeffGroupFlag[MLS_GRP_NUM ];     store the sub block whether is 0 or 1 , and the maximum is 64

    do
    {
      posLast = scan[ ++scanPosLast ]; //scan all coeff, no matter zero or nonzero    //@Note: the scan order is the original order for a array: first row , second row,....   posLast is the position point not coordinate

      // get L1 sig map
      UInt uiPosY    = posLast >> uiLog2BlockSize;                                  //@Note: From the last position to get which CG the last position in.
      UInt uiPosX    = posLast - ( uiPosY << uiLog2BlockSize );                     //@Note: uiPosX/Y are the coordinate in TU ; the posLast>>uiLog2BlockSize means divide the block size(2^uiLog2BlockSize) to get the coordinate
      UInt uiBlkIdx  = uiNumBlkSide * (uiPosY >> uiShift) + (uiPosX >> uiShift);    //@Note: Index of each 4*4 block in TU; the index is the position of each CG not coordinate; first row, second row,...
      if( pcCoef[ posLast ] )                                                       //@Note: If the last significant coefficient is found, the CG is marked as 1; as follow
      {
        uiSigCoeffGroupFlag[ uiBlkIdx ] = 1;                                        //@Note: Mark the current CG including last position;  complexity tranfer!!!!!!!!!!
      }

      uiNumSig -= ( pcCoef[ posLast ] != 0 );                                       //@Note: Untill finish all the nonzero coefficient: uiNumSig
    }
    while ( uiNumSig > 0 ); //@Note: unitll no nonzerro coefficient in current TU
                                                                                    //@Note: after the do...while, obtain the posLast, say the last coefficient position  
  // Code position of last coefficient
  Int posLastY = posLast >> uiLog2BlockSize;                                                       //@Note: transfer the last coefficient scan position to the XY coordinate
  Int posLastX = posLast - ( posLastY << uiLog2BlockSize );                                        
  codeLastSignificantXY(posLastX, posLastY, uiWidth, uiHeight, eTType, uiScanIdx);                 //@Note: encode the last coefficient XY coordinates
  
  //===== code significance flag =====
  ContextModel * const baseCoeffGroupCtx = m_cCUSigCoeffGroupSCModel.get( 0, eTType );                                                         //@Note: access the coded_sub_block_flag's context 
  ContextModel * const baseCtx = (eTType==TEXT_LUMA) ? m_cCUSigSCModel.get( 0, 0 ) : m_cCUSigSCModel.get( 0, 0 ) + NUM_SIG_FLAG_CTX_LUMA;      //@Note: Luma 27, Chroma 15, total 42 contexts for significant_coeff_flag


  const Int  iLastScanSet      = scanPosLast >> LOG2_SCAN_SET_SIZE;                                                                            //@Note: LOG2_SCAN_SET_SIZE is 4; each set has 16 coefficients; get the total sets numbers
  UInt c1 = 1;
  UInt uiGoRiceParam           = 0;
  Int  iScanPosSig             = scanPosLast;                                                                                                  //@Note: 

  for( Int iSubSet = iLastScanSet; iSubSet >= 0; iSubSet-- )//start point to CG                                                                //@Note start point the encoding process of each sub block
  {          //@Note: iSubSet is the sub-block scan index i which is defined in standard.
    Int numNonZero = 0;
    Int  iSubPos     = iSubSet << LOG2_SCAN_SET_SIZE;                                                                                         
    uiGoRiceParam    = 0;
    Int absCoeff[16];
    UInt coeffSigns = 0;

    Int lastNZPosInCG = -1, firstNZPosInCG = SCAN_SET_SIZE;                                                                                    //@Note: firstNZPosInCG is defined as 16 in each CG

    if( iScanPosSig == scanPosLast )  //@Note: the define detail in last CG including the last position 
    {
      absCoeff[ 0 ] = abs( pcCoef[ posLast ] );                                                                                                //@Note: define the last position of TU is the first position to be calculated in absCoeff[16]
      coeffSigns    = ( pcCoef[ posLast ] < 0 );                                                                                               //@Note: if positive, sign = 0; if negetive, sign =1;
      numNonZero    = 1;                              //@Note: mark the last coefficient in CG with  numNonZero=1; 
      lastNZPosInCG  = iScanPosSig;
      firstNZPosInCG = iScanPosSig;
      iScanPosSig--;
    }

      // encode significant_coeffgroup_flag
      Int iCGBlkPos = scanCG[ iSubSet ];                                                                                                      //@Note: according to different TU, scanCG is different
      Int iCGPosY   = iCGBlkPos / uiNumBlkSide;                                                                                               //@Note: change the CG position to CG coordinate,  uiNumBlkSide= width/4
      Int iCGPosX   = iCGBlkPos - (iCGPosY * uiNumBlkSide);
      if( iSubSet == iLastScanSet || iSubSet == 0)                                                                                            //@Note: the CG including the last posiiton and DC coefficient(first CG) are inferred as 1, no need to encode
      {
        uiSigCoeffGroupFlag[ iCGBlkPos ] = 1;                                                //@Note:achieve the information from the array uiSigCoeffGroupFlag[ ]                                                  
      }
      else
      {
          UInt uiSigCoeffGroup   = (uiSigCoeffGroupFlag[ iCGBlkPos ] != 0);                                                                   //@Note: get the value of coded_sub_block_flag 
          UInt uiCtxSig  = TComTrQuant::getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, iCGPosX, iCGPosY, uiWidth, uiHeight );                   //@Note: get the contex increment index ctxInc
          m_pcBinIf->encodeBin( uiSigCoeffGroup, baseCoeffGroupCtx[ uiCtxSig ] );                                                             //@Note: uiCtxSig only has two value 0 and 1
      }
      
      // encode significant_coeff_flag
      if( uiSigCoeffGroupFlag[ iCGBlkPos ] )                                                                                                  //@Note: mark the current CG is 1
      {
        Int patternSigCtx = TComTrQuant::calcPatternSigCtx( uiSigCoeffGroupFlag, iCGPosX, iCGPosY, uiWidth, uiHeight );                       
        UInt uiBlkPos, uiPosY, uiPosX, uiSig, uiCtxSig;
        for( ; iScanPosSig >= iSubPos; iScanPosSig-- )
        {
          uiBlkPos  = scan[ iScanPosSig ];                                                                                                     //@Note: Position in TU
          uiPosY    = uiBlkPos >> uiLog2BlockSize;                                                                                             //@Note: uiPosY and uiPosX are the coordinates in TU 
          uiPosX    = uiBlkPos - ( uiPosY << uiLog2BlockSize );
          uiSig     = (pcCoef[ uiBlkPos ] != 0);                                                                                               //@Note: get the value of significant_coeff_flag; pcCoef is an array for all the coefficients
          if( iScanPosSig > iSubPos || iSubSet == 0 || numNonZero )                                                                            
          {
            uiCtxSig  = TComTrQuant::getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlockSize, eTType );
            m_pcBinIf->encodeBin( uiSig, baseCtx[ uiCtxSig ] );                                                                                //@Note: Luma 27, Chroma 15, total 42 contexts for significant_coeff_flag
          }                           //@Note: get context used get() function for luma and chroma

          if( uiSig )   //@Note: if it is the nonzero coefficient 
          {
            absCoeff[ numNonZero ] = abs( pcCoef[ uiBlkPos ] );                                                                                //@Note: store the nonzero coefficient in absCoeff[]
            coeffSigns = 2 * coeffSigns + ( pcCoef[ uiBlkPos ] < 0 );                                                                         //@Note: why??????????
            numNonZero++;
            if( lastNZPosInCG == -1 )
            {
              lastNZPosInCG = iScanPosSig;
            }
            firstNZPosInCG = iScanPosSig;
          } //end the nonzero abs level and sign mark
        }  //end ???????
      }   // end one CG = 1significant_coeff_flag parsing
      else  // if the current CG is 0
      {
        iScanPosSig = iSubPos - 1;
      }

    if( numNonZero > 0 )                                                                    //@Note: if at least one nonzero coefficient
    {
      Bool signHidden = ( lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD );
      UInt uiCtxSet = (iSubSet > 0 && eTType==TEXT_LUMA) ? 2 : 0;                           //@Note: if the sub block index is 0 or cIdx is larger than 0(chroma), the ctxset is 0
                                         //@Note:  uiCtxSet is used to define the current context set. 
      if( c1 == 0 )   
      {
        uiCtxSet++;
      }
      c1 = 1;
      ContextModel *baseCtxMod = ( eTType==TEXT_LUMA ) ? m_cCUOneSCModel.get( 0, 0 ) + 4 * uiCtxSet : m_cCUOneSCModel.get( 0, 0 ) + NUM_ONE_FLAG_CTX_LUMA + 4 * uiCtxSet;           //@Note: Luma 16 for greater1 flag, 8 for chroma; total 24 contexts
      
      Int numC1Flag = min(numNonZero, C1FLAG_NUMBER);              //@Note: limit the greater1 flag as 8
      Int firstC2FlagIdx = -1;                                     //@Note: 
      for( Int idx = 0; idx < numC1Flag; idx++ )                   //@Note: encoding the greater than 1 flag
      {
        UInt uiSymbol = absCoeff[ idx ] > 1;                            //@Note: get the value of greater1_flag
        m_pcBinIf->encodeBin( uiSymbol, baseCtxMod[c1] );               //@Note: 
        if( uiSymbol )   //greater1_flag =1
        {
          c1 = 0;        //greater1Ctx is set as 0

          if (firstC2FlagIdx == -1)                                    //@Note: the predefined value for the first greater2 flag index in CG is -1
          {
            firstC2FlagIdx = idx;                                      //@Note: set the first greater2 flag index as the index of the (first greater1 flag is 1); actually, here,idx is the only 0
          }
        }
        else if( (c1 < 3) && (c1 > 0) )                                //@Note: c1 is 1 or 2;
        {
          c1++;
        }
      }       //end 8 greater1 
      
      if (c1 == 0)
      {

        baseCtxMod = ( eTType==TEXT_LUMA ) ? m_cCUAbsSCModel.get( 0, 0 ) + uiCtxSet : m_cCUAbsSCModel.get( 0, 0 ) + NUM_ABS_FLAG_CTX_LUMA + uiCtxSet;       //@Note: NUM_ABS_FLAG_CTX_LUMA = 4;
        if ( firstC2FlagIdx != -1)
        {
          UInt symbol = absCoeff[ firstC2FlagIdx ] > 2;
          m_pcBinIf->encodeBin( symbol, baseCtxMod[0] );
        }
      }
      
      if( beValid && signHidden )
      {
        m_pcBinIf->encodeBinsEP( (coeffSigns >> 1), numNonZero-1 );
      }
      else
      {
        m_pcBinIf->encodeBinsEP( coeffSigns, numNonZero );
      }
      
      Int iFirstCoeff2 = 1;    
      if (c1 == 0 || numNonZero > C1FLAG_NUMBER)
      {
        for ( Int idx = 0; idx < numNonZero; idx++ )
        {
          UInt baseLevel  = (idx < C1FLAG_NUMBER)? (2 + iFirstCoeff2 ) : 1;

          if( absCoeff[ idx ] >= baseLevel)
          {
            xWriteCoefRemainExGolomb( absCoeff[ idx ] - baseLevel, uiGoRiceParam );
            if(absCoeff[idx] > 3*(1<(uiGoRiceParam+ 1, 4);
            }
          }
          if(absCoeff[ idx ] >= 2)  
          {
            iFirstCoeff2 = 0;
          }
        }        
      }//end of sign coding
    }//end of g1,g2, sign coding, and finish the if(NumNonzero > 0)
  } //@Note: end one CG

  return;
}

上面转移系数中调用了codeLastSignificantXY 函数,这个函数是确定TU中最后一个非零系数的。 与H.264/AVC不同, 最后非零系数flag是按其在当前TU的XY坐标编码的。

这样做的好处就是减化扫描过程,比如一个N*N的TU,最坏情况只需扫描2N-2就行,而H.264中的交叉最后非零系数和非零系数的标记过程中,最坏情况是N*N-1,如果N是32时, 复杂度可想而知。

另外, HEVC中,最后非零系数是先在TU-level 确定了的,这是与H.264/AVC不同的地方,减少了dependence

下面是LastXY坐标的parsing 过程:

Void TEncSbac::codeLastSignificantXY( UInt uiPosX, UInt uiPosY, Int width, Int height, TextType eTType, UInt uiScanIdx )
{  
  // swap
  if( uiScanIdx == SCAN_VER )                                                                                 //@Note: If the scan order is vertical, the XY coordinates are interchange.
  {
    swap( uiPosX, uiPosY );
  }

  UInt uiCtxLast;                                                                                            //@Note: 
  ContextModel *pCtxX = m_cCuCtxLastX.get( 0, eTType );
  ContextModel *pCtxY = m_cCuCtxLastY.get( 0, eTType );
  UInt uiGroupIdxX    = g_uiGroupIdx[ uiPosX ];
  UInt uiGroupIdxY    = g_uiGroupIdx[ uiPosY ];


  Int blkSizeOffsetX, blkSizeOffsetY, shiftX, shiftY;
  blkSizeOffsetX = eTType ? 0: (g_aucConvertToBit[ width ] *3 + ((g_aucConvertToBit[ width ] +1)>>2));
  blkSizeOffsetY = eTType ? 0: (g_aucConvertToBit[ height ]*3 + ((g_aucConvertToBit[ height ]+1)>>2));
  shiftX= eTType ? g_aucConvertToBit[ width  ] :((g_aucConvertToBit[ width  ]+3)>>2);
  shiftY= eTType ? g_aucConvertToBit[ height ] :((g_aucConvertToBit[ height ]+3)>>2);
  // posX                                                                                              //@NOte: According to the posX and posY, to get the current TR state, how many 1s and 0.
  for( uiCtxLast = 0; uiCtxLast < uiGroupIdxX; uiCtxLast++ )
  {
      m_pcBinIf->encodeBin( 1, *( pCtxX + blkSizeOffsetX + (uiCtxLast >>shiftX) ) );                   //@Note: The ctxInc is  blkSizeOffsetX + (uiCtxLast >>shiftX); and uiCtxLast is the bin index
  }
  if( uiGroupIdxX < g_uiGroupIdx[ width - 1 ])
  {
      m_pcBinIf->encodeBin( 0, *( pCtxX + blkSizeOffsetX + (uiCtxLast >>shiftX) ) );            
  }

  // posY
  for( uiCtxLast = 0; uiCtxLast < uiGroupIdxY; uiCtxLast++ )
  {
    m_pcBinIf->encodeBin( 1, *( pCtxY + blkSizeOffsetY + (uiCtxLast >>shiftY) ) );
  }
  if( uiGroupIdxY < g_uiGroupIdx[ height - 1 ])
  {
    m_pcBinIf->encodeBin( 0, *( pCtxY + blkSizeOffsetY + (uiCtxLast >>shiftY) ) );
  }
  if ( uiGroupIdxX > 3 )                                                                             //@Note: for the TU blockn size larger than 4*4; say , 8*8, 16*16, 32*32
  {      
    UInt uiCount = ( uiGroupIdxX - 2 ) >> 1;                                                        //@Note: uiCount = (uiGroupIdxX >> 1) - 1, this is bit number
    // UInt uiCount = ( uiGroupIdxX >> 1) - 1; 
    uiPosX       = uiPosX - g_uiMinInGroup[ uiGroupIdxX ];                                          //@Note: get the new index for suffix
    for (Int i = uiCount - 1 ; i >= 0; i-- )                                                        //@Note: MSB first, that is when binIdx =0, which indicates the MSB
    {
      m_pcBinIf->encodeBinEP( ( uiPosX >> i ) & 1 );                                               //@Note: get the bin balue for bypass mode for Fixed Length code        //According to the posX is odd or even
    }
  }
  if ( uiGroupIdxY > 3 )
  {      
    UInt uiCount = ( uiGroupIdxY - 2 ) >> 1;
    uiPosY       = uiPosY - g_uiMinInGroup[ uiGroupIdxY ];
    for ( Int i = uiCount - 1 ; i >= 0; i-- )
    {
      m_pcBinIf->encodeBinEP( ( uiPosY >> i ) & 1 );
    }
  }
}

这里需要指出的是,XY坐标的syntax elements包括各自的prefix 和suffix, 主要是prefix ,因为这是regular coding,需要引用文本。 suffix 是bypass mode, 比较容易不涉及文本选择和概率更新。

另外,只有TU size 大于 4时, 才会有suffix出现

文本选择的过程参考标准书里的ctxInc推导过程,code 就是把那个过程实现,大部分都在TComTrQuant.cpp中定义文本选择 。其中涉及的编程思想也很多,实现过程挺有意思。


* 近期有时间再把MVD整理下



你可能感兴趣的:(HEVC,C++,熵编码)