H.266/VVC代码学习3:VTM4.0中的CTU层操作(encodeCtus)

由于要研究比特数的相关内容,本周阅读了encodeCtus。学习笔记如下:
从 H.266/VVC代码学习4:VTM代码整体结构及最上层函数 进入,将视频划分为GOP后继续划分为帧,到这里划分成为了CTU,可以说是很小的一部分了了。将CTU作为单位进行后续操作,下图中最左边进入的就是一帧,也能看出具体的操作单位是CTU。
H.266/VVC代码学习3:VTM4.0中的CTU层操作(encodeCtus)_第1张图片
后续进入compressCtu,对每一个CTU进行处理,具体可见H.266/VVC代码学习13:VTM4.0中的CU层操作(compressCtu 、 xCompressCU)
代码如下:

void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pEncLib )
{
  //PROF_ACCUM_AND_START_NEW_SET( getProfilerCTU( pcPic, 0, 0 ), P_PIC_LEVEL );
  //PROF_START( getProfilerCTU( cs.slice->isIntra(), pcPic->scheduler.getWppThreadId() ), P_PIC_LEVEL, toWSizeIdx( cs.pcv->maxCUWidth ), toHSizeIdx( cs.pcv->maxCUHeight ) );

  /***************************************初始化*****************************************/
  CodingStructure&  cs            = *pcPic->cs;
  Slice* pcSlice                  = cs.slice;
  const PreCalcValues& pcv        = *cs.pcv;
  const uint32_t        widthInCtus   = pcv.widthInCtus;
#if HEVC_TILES_WPP
  const TileMap&  tileMap         = *pcPic->tileMap;
#endif
#if ENABLE_QPA
  const int iQPIndex              = pcSlice->getSliceQpBase();
#endif

#if ENABLE_WPP_PARALLELISM
  const int       dataId          = pcPic->scheduler.getWppDataId();
#elif ENABLE_SPLIT_PARALLELISM
  const int       dataId          = 0;
#endif
  CABACWriter*    pCABACWriter    = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() );
  TrQuant*        pTrQuant        = pEncLib->getTrQuant( PARL_PARAM0( dataId ) );
  RdCost*         pRdCost         = pEncLib->getRdCost( PARL_PARAM0( dataId ) );
  EncCfg*         pCfg            = pEncLib;
  RateCtrl*       pRateCtrl       = pEncLib->getRateCtrl();
#if ENABLE_WPP_PARALLELISM
  // first version dont use ctx from above
  pCABACWriter->initCtxModels( *pcSlice );
#endif

/********************************处理QP和lambda*********************************/
#if RDOQ_CHROMA_LAMBDA
  pTrQuant    ->setLambdas( pcSlice->getLambdas() );
#else
  pTrQuant    ->setLambda ( pcSlice->getLambdas()[0] );
#endif
  pRdCost     ->setLambda ( pcSlice->getLambdas()[0], pcSlice->getSPS()->getBitDepths() );

  int prevQP[2];
  int currQP[2];
  prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
  currQP[0] = currQP[1] = pcSlice->getSliceQp();

#if HEVC_DEPENDENT_SLICES
  if( !pcSlice->getDependentSliceSegmentFlag() )
  {
#endif
    prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
#if HEVC_DEPENDENT_SLICES
  }
#endif
#if JVET_M0255_FRACMMVD_SWITCH
  if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ||
#if JVET_M0483_IBC
      (pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch()))
#else
      ( pcSlice->getSPS()->getIBCMode() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch() ) )
#endif
  {
#if JVET_M0427_INLOOP_RESHAPER
#if JVET_M0483_IBC
    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
#else
    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCMode())
#endif
      cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcLib->getReshaper()->getFwdLUT());
#endif
    m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap( cs.picture->getOrigBuf() );
#if JVET_M0427_INLOOP_RESHAPER
#if JVET_M0483_IBC
    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
#else
    if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCMode())
#endif
      cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf());
#endif
  }
  checkDisFracMmvd( pcPic, startCtuTsAddr, boundingCtuTsAddr );
#endif
  // for every CTU in the slice segment (may terminate sooner if there is a byte limit on the slice-segment)
/*****************************************对每个CTU处理*****************************************/
  for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )//以CTU为单位,遍历本帧的每个CTU
  {
#if JVET_M0055_DEBUG_CTU
 #if HEVC_TILES_WPP
    const int32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr );
 #else
    const int32_t ctuRsAddr = ctuTsAddr;
#endif
#else
#if HEVC_TILES_WPP
    const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap(ctuTsAddr);
#else
    const uint32_t ctuRsAddr = ctuTsAddr;
#endif
#endif

#if HEVC_TILES_WPP
    // update CABAC state 更新CABAC状态(CTU位置,尺寸等信息)。上面的for循环对每个CTU进行遍历
    const uint32_t firstCtuRsAddrOfTile = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)].getFirstCtuRsAddr();
    const uint32_t tileXPosInCtus       = firstCtuRsAddrOfTile % widthInCtus;
#endif
    const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
    const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;

    const Position pos (ctuXPosInCtus * pcv.maxCUWidth, ctuYPosInCtus * pcv.maxCUHeight);
    const UnitArea ctuArea( cs.area.chromaFormat, Area( pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight ) );
    DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );

#if JVET_M0055_DEBUG_CTU
    if( pCfg->getSwitchPOC() != pcPic->poc || -1 == pCfg->getDebugCTU() )
#endif
    if ( pcSlice->getSliceType() != I_SLICE && ctuXPosInCtus == 0)
    {
      pcSlice->resetMotionLUTs();
    }

#if ENABLE_WPP_PARALLELISM
    pcPic->scheduler.wait( ctuXPosInCtus, ctuYPosInCtus );
#endif

#if HEVC_TILES_WPP
	//初始化编码器CABAC
    if (ctuRsAddr == firstCtuRsAddrOfTile)
    {
      pCABACWriter->initCtxModels( *pcSlice );
      prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
    }
    else if (ctuXPosInCtus == tileXPosInCtus && pEncLib->getEntropyCodingSyncEnabledFlag())
    {
      // reset and then update contexts to the state at the end of the top-right CTU (if within current slice and tile).
      pCABACWriter->initCtxModels( *pcSlice );
      if( cs.getCURestricted( pos.offset(pcv.maxCUWidth, -1), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) )
      {
        // Top-right is available, we use it.
        pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextState;
      }
      prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
    }
#endif

#if ENABLE_WPP_PARALLELISM
    if( ctuXPosInCtus == 0 && ctuYPosInCtus > 0 && widthInCtus > 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
    {
      pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus-1];  // last line
    }
#else
#endif

#if RDOQ_CHROMA_LAMBDA && ENABLE_QPA
    double oldLambdaArray[MAX_NUM_COMPONENT] = {0.0};
#endif
    const double oldLambda = pRdCost->getLambda();
    if ( pCfg->getUseRateCtrl() /*false*/)
    {
      int estQP        = pcSlice->getSliceQp();
      double estLambda = -1.0;
      double bpp       = -1.0;

      if( ( pcPic->slices[0]->isIRAP() && pCfg->getForceIntraQP() ) || !pCfg->getLCULevelRC() )
      {
        estQP = pcSlice->getSliceQp();
      }
      else
      {
        bpp = pRateCtrl->getRCPic()->getLCUTargetBpp(pcSlice->isIRAP());
        if ( pcPic->slices[0]->isIRAP())
        {
          estLambda = pRateCtrl->getRCPic()->getLCUEstLambdaAndQP(bpp, pcSlice->getSliceQp(), &estQP);
        }
        else
        {
          estLambda = pRateCtrl->getRCPic()->getLCUEstLambda( bpp );
          estQP     = pRateCtrl->getRCPic()->getLCUEstQP    ( estLambda, pcSlice->getSliceQp() );
        }

        estQP     = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, estQP );

        pRdCost->setLambda(estLambda, pcSlice->getSPS()->getBitDepths());

#if RDOQ_CHROMA_LAMBDA
        // set lambda for RDOQ
        const double chromaLambda = estLambda / pRdCost->getChromaWeight();
        const double lambdaArray[MAX_NUM_COMPONENT] = { estLambda, chromaLambda, chromaLambda };
        pTrQuant->setLambdas( lambdaArray );
#else
        pTrQuant->setLambda( estLambda );
#endif
      }

      pRateCtrl->setRCQP( estQP );
    }
#if ENABLE_QPA
    else if (pCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP()/*false*/)
    {
      const int adaptedQP    = pcPic->m_iOffsetCtu[ctuRsAddr];
      const double newLambda = oldLambda * pow (2.0, double (adaptedQP - iQPIndex) / 3.0);
      pcPic->m_uEnerHpCtu[ctuRsAddr] = newLambda;
#if RDOQ_CHROMA_LAMBDA
      pTrQuant->getLambdas (oldLambdaArray); // save the old lambdas
      const double chromaLambda = newLambda / pRdCost->getChromaWeight();
      const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda};
      pTrQuant->setLambdas (lambdaArray);
#else
      pTrQuant->setLambda (newLambda);
#endif
      pRdCost->setLambda (newLambda, pcSlice->getSPS()->getBitDepths());
      currQP[0] = currQP[1] = adaptedQP;
    }
#endif

    bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr;
    if( updateGbiCodingOrder /*false*/)
    {
      resetGbiCodingOrder(false, cs);
      m_pcInterSearch->initWeightIdxBits();
    }
#if JVET_M0427_INLOOP_RESHAPER && REUSE_CU_RESULTS
    if (pcSlice->getSPS()->getUseReshaper())
    {
      m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc());
    }
#endif

#if JVET_M0055_DEBUG_CTU
  if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU())
#endif
#if ENABLE_WPP_PARALLELISM
    pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
#else
/*****************最重要的地方:完成一个帧的"划分",以CTU为单位,根据RDCOST确定各种最优的预测参数***********************/
    m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );////========重点1!!=======

#if PRINT//本人自行添加
  //printf("(%d,%d) %d %d: luma bits is %lld\n", cs.area.blocks[0].x, cs.area.blocks[0].y, cs.area.blocks[0].width, cs.area.blocks[0].height, cs.fracBits);
#endif 

#endif

#if K0149_BLOCK_STATISTICS
    getAndStoreBlockStatistics(cs, ctuArea);
#endif
	/***以CTU为单位,对compressSlice后的划分模式,预测信息,残差和各种标记进行熵编码(CABAC)***/
    pCABACWriter->resetBits();
    pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true );//========重点2!!=======
    const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS );//写进去的比特数!!!!!!!!
#if LUMA_AND_CHROMA_BITS
	const int numberOfLumaBits = int(pCABACWriter->getEstFracBits() >> SCALE_BITS);
	const int numberOfChromaBits = int(pCABACWriter->getEstFracBits() >> SCALE_BITS);
#endif 

    // Calculate if this CTU puts us over slice bit size.
    // cannot terminate if current slice/slice-segment would be 0 Ctu in size,
    const uint32_t validEndOfSliceCtuTsAddr = ctuTsAddr + (ctuTsAddr == startCtuTsAddr ? 1 : 0);
    // Set slice end parameter
    if(pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceBits()+numberOfWrittenBits > (pcSlice->getSliceArgument()<<3)/*false*/)
    {
#if HEVC_DEPENDENT_SLICES
      pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
#endif
      pcSlice->setSliceCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
      boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
    }
#if HEVC_DEPENDENT_SLICES
    else if((!bCompressEntireSlice) && pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3))
    {
      pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
      boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
    }
#endif
    if (boundingCtuTsAddr <= ctuTsAddr)
    {
      break;
    }

#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
#pragma omp critical
#endif
    pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) );
#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
#pragma omp critical
#endif
#if HEVC_DEPENDENT_SLICES
    pcSlice->setSliceSegmentBits( pcSlice->getSliceSegmentBits() + numberOfWrittenBits );
#endif

#if HEVC_TILES_WPP
    // Store probabilities of second CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
    if( ctuXPosInCtus == tileXPosInCtus + 1 && pEncLib->getEntropyCodingSyncEnabledFlag() )
    {
      pEncLib->m_entropyCodingSyncContextState = pCABACWriter->getCtx();
    }
#endif
#if ENABLE_WPP_PARALLELISM
    if( ctuXPosInCtus == 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
    {
      pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus] = pCABACWriter->getCtx();
    }
#endif

#if !ENABLE_WPP_PARALLELISM
	/********************************************比特计算****************************************/
    int actualBits = int(cs.fracBits >> SCALE_BITS);
    actualBits    -= (int)m_uiPicTotalBits;
#endif
    if ( pCfg->getUseRateCtrl()/*false*/ )
    {
#if ENABLE_WPP_PARALLELISM
      int actualBits      = int( cs.fracBits >> SCALE_BITS );
      actualBits         -= (int)m_uiPicTotalBits;
#endif
      int actualQP        = g_RCInvalidQPValue;
      double actualLambda = pRdCost->getLambda();
      int numberOfEffectivePixels    = 0;

      for( auto &cu : cs.traverseCUs( ctuArea, CH_L ) )
      {
        if( !cu.skip || cu.rootCbf )
        {
          numberOfEffectivePixels += cu.lumaSize().area();
          break;
        }
      }

      CodingUnit* cu = cs.getCU( ctuArea.lumaPos(), CH_L );

      if ( numberOfEffectivePixels == 0 )
      {
        actualQP = g_RCInvalidQPValue;
      }
      else
      {
        actualQP = cu->qp;
      }
      pRdCost->setLambda(oldLambda, pcSlice->getSPS()->getBitDepths());
      pRateCtrl->getRCPic()->updateAfterCTU( pRateCtrl->getRCPic()->getLCUCoded(), actualBits, actualQP, actualLambda,
                                             pcSlice->isIRAP() ? 0 : pCfg->getLCULevelRC() );
    }
#if ENABLE_QPA
    else if (pCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP()/*false*/)
    {
#if RDOQ_CHROMA_LAMBDA
      pTrQuant->setLambdas (oldLambdaArray);
#else
      pTrQuant->setLambda (oldLambda);
#endif
      pRdCost->setLambda (oldLambda, pcSlice->getSPS()->getBitDepths());
    }
#endif

#if !ENABLE_WPP_PARALLELISM
    m_uiPicTotalBits += actualBits;
    m_uiPicDist       = cs.dist;
#endif
#if ENABLE_WPP_PARALLELISM
    pcPic->scheduler.setReady( ctuXPosInCtus, ctuYPosInCtus );
#endif
  }
#if PRINT//本人添加
  printf("the number of total bits is %lld\n\n", m_uiPicTotalBits);
#endif

  // this is wpp exclusive section

//  m_uiPicTotalBits += actualBits;
//  m_uiPicDist       = cs.dist;

}

对于两个函数的意义,参考https://blog.csdn.net/cxy19931018/article/details/80672519

你可能感兴趣的:(H.266/VVC视频编码)