由于要研究比特数的相关内容,本周阅读了encodeCtus。学习笔记如下:
从 H.266/VVC代码学习4:VTM代码整体结构及最上层函数 进入,将视频划分为GOP后继续划分为帧,到这里划分成为了CTU,可以说是很小的一部分了了。将CTU作为单位进行后续操作,下图中最左边进入的就是一帧,也能看出具体的操作单位是CTU。
后续进入compressCtu,对每一个CTU进行处理,具体可见H.266/VVC代码学习13:VTM4.0中的CU层操作(compressCtu 、 xCompressCU)
代码如下:
void EncSlice::encodeCtus( Picture* pcPic, const bool bCompressEntireSlice, const bool bFastDeltaQP, uint32_t startCtuTsAddr, uint32_t boundingCtuTsAddr, EncLib* pEncLib )
{
//PROF_ACCUM_AND_START_NEW_SET( getProfilerCTU( pcPic, 0, 0 ), P_PIC_LEVEL );
//PROF_START( getProfilerCTU( cs.slice->isIntra(), pcPic->scheduler.getWppThreadId() ), P_PIC_LEVEL, toWSizeIdx( cs.pcv->maxCUWidth ), toHSizeIdx( cs.pcv->maxCUHeight ) );
/***************************************初始化*****************************************/
CodingStructure& cs = *pcPic->cs;
Slice* pcSlice = cs.slice;
const PreCalcValues& pcv = *cs.pcv;
const uint32_t widthInCtus = pcv.widthInCtus;
#if HEVC_TILES_WPP
const TileMap& tileMap = *pcPic->tileMap;
#endif
#if ENABLE_QPA
const int iQPIndex = pcSlice->getSliceQpBase();
#endif
#if ENABLE_WPP_PARALLELISM
const int dataId = pcPic->scheduler.getWppDataId();
#elif ENABLE_SPLIT_PARALLELISM
const int dataId = 0;
#endif
CABACWriter* pCABACWriter = pEncLib->getCABACEncoder( PARL_PARAM0( dataId ) )->getCABACEstimator( pcSlice->getSPS() );
TrQuant* pTrQuant = pEncLib->getTrQuant( PARL_PARAM0( dataId ) );
RdCost* pRdCost = pEncLib->getRdCost( PARL_PARAM0( dataId ) );
EncCfg* pCfg = pEncLib;
RateCtrl* pRateCtrl = pEncLib->getRateCtrl();
#if ENABLE_WPP_PARALLELISM
// first version dont use ctx from above
pCABACWriter->initCtxModels( *pcSlice );
#endif
/********************************处理QP和lambda*********************************/
#if RDOQ_CHROMA_LAMBDA
pTrQuant ->setLambdas( pcSlice->getLambdas() );
#else
pTrQuant ->setLambda ( pcSlice->getLambdas()[0] );
#endif
pRdCost ->setLambda ( pcSlice->getLambdas()[0], pcSlice->getSPS()->getBitDepths() );
int prevQP[2];
int currQP[2];
prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
currQP[0] = currQP[1] = pcSlice->getSliceQp();
#if HEVC_DEPENDENT_SLICES
if( !pcSlice->getDependentSliceSegmentFlag() )
{
#endif
prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
#if HEVC_DEPENDENT_SLICES
}
#endif
#if JVET_M0255_FRACMMVD_SWITCH
if ( pcSlice->getSPS()->getDisFracMmvdEnabledFlag() ||
#if JVET_M0483_IBC
(pcSlice->getSPS()->getIBCFlag() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch()))
#else
( pcSlice->getSPS()->getIBCMode() && m_pcCuEncoder->getEncCfg()->getIBCHashSearch() ) )
#endif
{
#if JVET_M0427_INLOOP_RESHAPER
#if JVET_M0483_IBC
if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
#else
if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCMode())
#endif
cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcLib->getReshaper()->getFwdLUT());
#endif
m_pcCuEncoder->getIbcHashMap().rebuildPicHashMap( cs.picture->getOrigBuf() );
#if JVET_M0427_INLOOP_RESHAPER
#if JVET_M0483_IBC
if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCFlag())
#else
if (pcSlice->getSPS()->getUseReshaper() && m_pcLib->getReshaper()->getCTUFlag() && pcSlice->getSPS()->getIBCMode())
#endif
cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf());
#endif
}
checkDisFracMmvd( pcPic, startCtuTsAddr, boundingCtuTsAddr );
#endif
// for every CTU in the slice segment (may terminate sooner if there is a byte limit on the slice-segment)
/*****************************************对每个CTU处理*****************************************/
for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )//以CTU为单位,遍历本帧的每个CTU
{
#if JVET_M0055_DEBUG_CTU
#if HEVC_TILES_WPP
const int32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap( ctuTsAddr );
#else
const int32_t ctuRsAddr = ctuTsAddr;
#endif
#else
#if HEVC_TILES_WPP
const uint32_t ctuRsAddr = tileMap.getCtuTsToRsAddrMap(ctuTsAddr);
#else
const uint32_t ctuRsAddr = ctuTsAddr;
#endif
#endif
#if HEVC_TILES_WPP
// update CABAC state 更新CABAC状态(CTU位置,尺寸等信息)。上面的for循环对每个CTU进行遍历
const uint32_t firstCtuRsAddrOfTile = tileMap.tiles[tileMap.getTileIdxMap(ctuRsAddr)].getFirstCtuRsAddr();
const uint32_t tileXPosInCtus = firstCtuRsAddrOfTile % widthInCtus;
#endif
const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus;
const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus;
const Position pos (ctuXPosInCtus * pcv.maxCUWidth, ctuYPosInCtus * pcv.maxCUHeight);
const UnitArea ctuArea( cs.area.chromaFormat, Area( pos.x, pos.y, pcv.maxCUWidth, pcv.maxCUHeight ) );
DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
#if JVET_M0055_DEBUG_CTU
if( pCfg->getSwitchPOC() != pcPic->poc || -1 == pCfg->getDebugCTU() )
#endif
if ( pcSlice->getSliceType() != I_SLICE && ctuXPosInCtus == 0)
{
pcSlice->resetMotionLUTs();
}
#if ENABLE_WPP_PARALLELISM
pcPic->scheduler.wait( ctuXPosInCtus, ctuYPosInCtus );
#endif
#if HEVC_TILES_WPP
//初始化编码器CABAC
if (ctuRsAddr == firstCtuRsAddrOfTile)
{
pCABACWriter->initCtxModels( *pcSlice );
prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
}
else if (ctuXPosInCtus == tileXPosInCtus && pEncLib->getEntropyCodingSyncEnabledFlag())
{
// reset and then update contexts to the state at the end of the top-right CTU (if within current slice and tile).
pCABACWriter->initCtxModels( *pcSlice );
if( cs.getCURestricted( pos.offset(pcv.maxCUWidth, -1), pcSlice->getIndependentSliceIdx(), tileMap.getTileIdxMap( pos ), CH_L ) )
{
// Top-right is available, we use it.
pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextState;
}
prevQP[0] = prevQP[1] = pcSlice->getSliceQp();
}
#endif
#if ENABLE_WPP_PARALLELISM
if( ctuXPosInCtus == 0 && ctuYPosInCtus > 0 && widthInCtus > 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
{
pCABACWriter->getCtx() = pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus-1]; // last line
}
#else
#endif
#if RDOQ_CHROMA_LAMBDA && ENABLE_QPA
double oldLambdaArray[MAX_NUM_COMPONENT] = {0.0};
#endif
const double oldLambda = pRdCost->getLambda();
if ( pCfg->getUseRateCtrl() /*false*/)
{
int estQP = pcSlice->getSliceQp();
double estLambda = -1.0;
double bpp = -1.0;
if( ( pcPic->slices[0]->isIRAP() && pCfg->getForceIntraQP() ) || !pCfg->getLCULevelRC() )
{
estQP = pcSlice->getSliceQp();
}
else
{
bpp = pRateCtrl->getRCPic()->getLCUTargetBpp(pcSlice->isIRAP());
if ( pcPic->slices[0]->isIRAP())
{
estLambda = pRateCtrl->getRCPic()->getLCUEstLambdaAndQP(bpp, pcSlice->getSliceQp(), &estQP);
}
else
{
estLambda = pRateCtrl->getRCPic()->getLCUEstLambda( bpp );
estQP = pRateCtrl->getRCPic()->getLCUEstQP ( estLambda, pcSlice->getSliceQp() );
}
estQP = Clip3( -pcSlice->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, estQP );
pRdCost->setLambda(estLambda, pcSlice->getSPS()->getBitDepths());
#if RDOQ_CHROMA_LAMBDA
// set lambda for RDOQ
const double chromaLambda = estLambda / pRdCost->getChromaWeight();
const double lambdaArray[MAX_NUM_COMPONENT] = { estLambda, chromaLambda, chromaLambda };
pTrQuant->setLambdas( lambdaArray );
#else
pTrQuant->setLambda( estLambda );
#endif
}
pRateCtrl->setRCQP( estQP );
}
#if ENABLE_QPA
else if (pCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP()/*false*/)
{
const int adaptedQP = pcPic->m_iOffsetCtu[ctuRsAddr];
const double newLambda = oldLambda * pow (2.0, double (adaptedQP - iQPIndex) / 3.0);
pcPic->m_uEnerHpCtu[ctuRsAddr] = newLambda;
#if RDOQ_CHROMA_LAMBDA
pTrQuant->getLambdas (oldLambdaArray); // save the old lambdas
const double chromaLambda = newLambda / pRdCost->getChromaWeight();
const double lambdaArray[MAX_NUM_COMPONENT] = {newLambda, chromaLambda, chromaLambda};
pTrQuant->setLambdas (lambdaArray);
#else
pTrQuant->setLambda (newLambda);
#endif
pRdCost->setLambda (newLambda, pcSlice->getSPS()->getBitDepths());
currQP[0] = currQP[1] = adaptedQP;
}
#endif
bool updateGbiCodingOrder = cs.slice->getSliceType() == B_SLICE && ctuTsAddr == startCtuTsAddr;
if( updateGbiCodingOrder /*false*/)
{
resetGbiCodingOrder(false, cs);
m_pcInterSearch->initWeightIdxBits();
}
#if JVET_M0427_INLOOP_RESHAPER && REUSE_CU_RESULTS
if (pcSlice->getSPS()->getUseReshaper())
{
m_pcCuEncoder->setDecCuReshaperInEncCU(m_pcLib->getReshaper(), pcSlice->getSPS()->getChromaFormatIdc());
}
#endif
#if JVET_M0055_DEBUG_CTU
if (pCfg->getSwitchPOC() != pcPic->poc || ctuRsAddr >= pCfg->getDebugCTU())
#endif
#if ENABLE_WPP_PARALLELISM
pEncLib->getCuEncoder( dataId )->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );
#else
/*****************最重要的地方:完成一个帧的"划分",以CTU为单位,根据RDCOST确定各种最优的预测参数***********************/
m_pcCuEncoder->compressCtu( cs, ctuArea, ctuRsAddr, prevQP, currQP );////========重点1!!=======
#if PRINT//本人自行添加
//printf("(%d,%d) %d %d: luma bits is %lld\n", cs.area.blocks[0].x, cs.area.blocks[0].y, cs.area.blocks[0].width, cs.area.blocks[0].height, cs.fracBits);
#endif
#endif
#if K0149_BLOCK_STATISTICS
getAndStoreBlockStatistics(cs, ctuArea);
#endif
/***以CTU为单位,对compressSlice后的划分模式,预测信息,残差和各种标记进行熵编码(CABAC)***/
pCABACWriter->resetBits();
pCABACWriter->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true );//========重点2!!=======
const int numberOfWrittenBits = int( pCABACWriter->getEstFracBits() >> SCALE_BITS );//写进去的比特数!!!!!!!!
#if LUMA_AND_CHROMA_BITS
const int numberOfLumaBits = int(pCABACWriter->getEstFracBits() >> SCALE_BITS);
const int numberOfChromaBits = int(pCABACWriter->getEstFracBits() >> SCALE_BITS);
#endif
// Calculate if this CTU puts us over slice bit size.
// cannot terminate if current slice/slice-segment would be 0 Ctu in size,
const uint32_t validEndOfSliceCtuTsAddr = ctuTsAddr + (ctuTsAddr == startCtuTsAddr ? 1 : 0);
// Set slice end parameter
if(pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceBits()+numberOfWrittenBits > (pcSlice->getSliceArgument()<<3)/*false*/)
{
#if HEVC_DEPENDENT_SLICES
pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
#endif
pcSlice->setSliceCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
}
#if HEVC_DEPENDENT_SLICES
else if((!bCompressEntireSlice) && pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES && pcSlice->getSliceSegmentBits()+numberOfWrittenBits > (pcSlice->getSliceSegmentArgument()<<3))
{
pcSlice->setSliceSegmentCurEndCtuTsAddr(validEndOfSliceCtuTsAddr);
boundingCtuTsAddr=validEndOfSliceCtuTsAddr;
}
#endif
if (boundingCtuTsAddr <= ctuTsAddr)
{
break;
}
#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
#pragma omp critical
#endif
pcSlice->setSliceBits( ( uint32_t ) ( pcSlice->getSliceBits() + numberOfWrittenBits ) );
#if ENABLE_WPP_PARALLELISM || ENABLE_SPLIT_PARALLELISM
#pragma omp critical
#endif
#if HEVC_DEPENDENT_SLICES
pcSlice->setSliceSegmentBits( pcSlice->getSliceSegmentBits() + numberOfWrittenBits );
#endif
#if HEVC_TILES_WPP
// Store probabilities of second CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
if( ctuXPosInCtus == tileXPosInCtus + 1 && pEncLib->getEntropyCodingSyncEnabledFlag() )
{
pEncLib->m_entropyCodingSyncContextState = pCABACWriter->getCtx();
}
#endif
#if ENABLE_WPP_PARALLELISM
if( ctuXPosInCtus == 1 && ( pEncLib->getNumWppThreads() > 1 || pEncLib->getEnsureWppBitEqual() ) )
{
pEncLib->m_entropyCodingSyncContextStateVec[ctuYPosInCtus] = pCABACWriter->getCtx();
}
#endif
#if !ENABLE_WPP_PARALLELISM
/********************************************比特计算****************************************/
int actualBits = int(cs.fracBits >> SCALE_BITS);
actualBits -= (int)m_uiPicTotalBits;
#endif
if ( pCfg->getUseRateCtrl()/*false*/ )
{
#if ENABLE_WPP_PARALLELISM
int actualBits = int( cs.fracBits >> SCALE_BITS );
actualBits -= (int)m_uiPicTotalBits;
#endif
int actualQP = g_RCInvalidQPValue;
double actualLambda = pRdCost->getLambda();
int numberOfEffectivePixels = 0;
for( auto &cu : cs.traverseCUs( ctuArea, CH_L ) )
{
if( !cu.skip || cu.rootCbf )
{
numberOfEffectivePixels += cu.lumaSize().area();
break;
}
}
CodingUnit* cu = cs.getCU( ctuArea.lumaPos(), CH_L );
if ( numberOfEffectivePixels == 0 )
{
actualQP = g_RCInvalidQPValue;
}
else
{
actualQP = cu->qp;
}
pRdCost->setLambda(oldLambda, pcSlice->getSPS()->getBitDepths());
pRateCtrl->getRCPic()->updateAfterCTU( pRateCtrl->getRCPic()->getLCUCoded(), actualBits, actualQP, actualLambda,
pcSlice->isIRAP() ? 0 : pCfg->getLCULevelRC() );
}
#if ENABLE_QPA
else if (pCfg->getUsePerceptQPA() && pcSlice->getPPS()->getUseDQP()/*false*/)
{
#if RDOQ_CHROMA_LAMBDA
pTrQuant->setLambdas (oldLambdaArray);
#else
pTrQuant->setLambda (oldLambda);
#endif
pRdCost->setLambda (oldLambda, pcSlice->getSPS()->getBitDepths());
}
#endif
#if !ENABLE_WPP_PARALLELISM
m_uiPicTotalBits += actualBits;
m_uiPicDist = cs.dist;
#endif
#if ENABLE_WPP_PARALLELISM
pcPic->scheduler.setReady( ctuXPosInCtus, ctuYPosInCtus );
#endif
}
#if PRINT//本人添加
printf("the number of total bits is %lld\n\n", m_uiPicTotalBits);
#endif
// this is wpp exclusive section
// m_uiPicTotalBits += actualBits;
// m_uiPicDist = cs.dist;
}
对于两个函数的意义,参考https://blog.csdn.net/cxy19931018/article/details/80672519