从H.266/VVC代码学习3:VTM4.0中的CTU层操作(encodeCtus)我们了解到了视频最基本的操作块为CTU,现在我们进一步学习CU层的操作。有两个十分重要的函数:
是对encodeCtu中拿到的单个ctu进行操作,是进行CU划分预测等处理的入口。内部一共会调用两次 xCompressCU ,一次是对亮度进行处理,一次是对色度进行处理。可见compressCtu是开始划分亮度和色度的开始,处理完一个CTU的亮度分量后,紧接着处理这个CTU的色度分量,之后处理下个 CTU。。
主要目的是选择预测模式和进行深一层的预测操作。其中xCheckModeSplit会用递归调用xCompressCU的方式遍历全部可能的划分方式,如64x64的亮度块(32x32的色度块)都会进行帧内模式、四叉树、两种二叉树这四种划分,如32x32的亮度块(16x16的色度块)都会进行帧内模式、四叉树、两种二叉树、两种三叉树这六种划分。下图为basketballdrill序列的色度划分信息可供理解:
以这样的方式,一步步划分到最底层,最后计算总RD代价确定究竟使用何种划分,并根据后面的预测记录最优模式信息,完成这个CTU的划分预测以及内部深层次的变换量化重建等部分。
VTM4.0中EncCu::compressCtu代码如下:
void EncCu::compressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[], const int currQP[] )
{
#if !JVET_M0255_FRACMMVD_SWITCH
if (m_pcEncCfg->getIBCHashSearch() && ctuRsAddr == 0 && cs.slice->getSPS()->getIBCMode())
{
#if JVET_M0427_INLOOP_RESHAPER
if (cs.slice->getSPS()->getUseReshaper() && m_pcReshape->getCTUFlag())
cs.picture->getOrigBuf(COMPONENT_Y).rspSignal(m_pcReshape->getFwdLUT());
#endif
m_ibcHashMap.rebuildPicHashMap(cs.picture->getOrigBuf());
#if JVET_M0427_INLOOP_RESHAPER
if (cs.slice->getSPS()->getUseReshaper() && m_pcReshape->getCTUFlag())
cs.picture->getOrigBuf().copyFrom(cs.picture->getTrueOrigBuf());
#endif
}
#endif
/*************************** 初始化 ***************************/
m_modeCtrl->initCTUEncoding( *cs.slice );
#if ENABLE_SPLIT_PARALLELISM
if( m_pcEncCfg->getNumSplitThreads() > 1 )
{
for( int jId = 1; jId < NUM_RESERVERD_SPLIT_JOBS; jId++ )
{
EncCu* jobEncCu = m_pcEncLib->getCuEncoder( cs.picture->scheduler.getSplitDataId( jId ) );
CacheBlkInfoCtrl* cacheCtrl = dynamic_cast< CacheBlkInfoCtrl* >( jobEncCu->m_modeCtrl );
if( cacheCtrl )
{
cacheCtrl->init( *cs.slice );
}
}
}
if( auto* cacheCtrl = dynamic_cast<CacheBlkInfoCtrl*>( m_modeCtrl ) ) { cacheCtrl->tick(); }
#endif
// init the partitioning manager 划分
Partitioner *partitioner = PartitionerFactory::get( *cs.slice );
partitioner->initCtu( area, CH_L, *cs.slice );
if (m_pcEncCfg->getIBCMode())
{
if (area.lx() == 0 && area.ly() == 0)
{
m_pcInterSearch->resetIbcSearch();
}
m_pcInterSearch->resetCtuRecord();
m_ctuIbcSearchRangeX = m_pcEncCfg->getIBCLocalSearchRangeX();
m_ctuIbcSearchRangeY = m_pcEncCfg->getIBCLocalSearchRangeY();
}
if (m_pcEncCfg->getIBCMode() && m_pcEncCfg->getIBCHashSearch() && (m_pcEncCfg->getIBCFastMethod() & IBC_FAST_METHOD_ADAPTIVE_SEARCHRANGE))
{
const int hashHitRatio = m_ibcHashMap.getHashHitRatio(area.Y()); // in percent
if (hashHitRatio < 5) // 5%
{
m_ctuIbcSearchRangeX >>= 1;
m_ctuIbcSearchRangeY >>= 1;
}
#if JVET_M0483_IBC
if (cs.slice->getNumRefIdx(REF_PIC_LIST_0) > 0)
#else
if (cs.slice->getNumRefIdx(REF_PIC_LIST_0) > 1)
#endif
{
m_ctuIbcSearchRangeX >>= 1;
m_ctuIbcSearchRangeY >>= 1;
}
}
// init current context pointer 当前上下文指针
m_CurrCtx = m_CtxBuffer.data();
CodingStructure *tempCS = m_pTempCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];
CodingStructure *bestCS = m_pBestCS[gp_sizeIdxInfo->idxFrom( area.lumaSize().width )][gp_sizeIdxInfo->idxFrom( area.lumaSize().height )];
LutMotionCand *tempMotCandLUTs = m_pTempMotLUTs[gp_sizeIdxInfo->idxFrom(area.lumaSize().width)][gp_sizeIdxInfo->idxFrom(area.lumaSize().height)];
LutMotionCand *bestMotCandLUTs = m_pBestMotLUTs[gp_sizeIdxInfo->idxFrom(area.lumaSize().width)][gp_sizeIdxInfo->idxFrom(area.lumaSize().height)];
cs.slice->copyMotionLUTs(cs.slice->getMotionLUTs(), tempMotCandLUTs);
cs.slice->copyMotionLUTs(cs.slice->getMotionLUTs(), bestMotCandLUTs);
/******************************* 对亮度进行操作 ******************************/
cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false );
cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false );
tempCS->currQP[CH_L] = bestCS->currQP[CH_L] =
tempCS->baseQP = bestCS->baseQP = currQP[CH_L];
tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L];
xCompressCU( tempCS, bestCS, *partitioner//进行CU处理
, tempMotCandLUTs
, bestMotCandLUTs
);
// all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS
#if JVET_M0427_INLOOP_RESHAPER
const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
#else
const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS;
#endif
cs.useSubStructure( *bestCS, partitioner->chType, CS::getArea( *bestCS, area, partitioner->chType ), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals );
cs.slice->copyMotionLUTs(bestMotCandLUTs, cs.slice->getMotionLUTs());
#if !REVERSAL_LUMA_AND_CHROMA
/**************************** 对色度进行操作 ****************************/
if (!cs.pcv->ISingleTree && cs.slice->isIRAP() && cs.pcv->chrFormat != CHROMA_400)
{
m_CABACEstimator->getCtx() = m_CurrCtx->start;
partitioner->initCtu(area, CH_C, *cs.slice);
cs.initSubStructure(*tempCS, partitioner->chType, partitioner->currArea(), false);
cs.initSubStructure(*bestCS, partitioner->chType, partitioner->currArea(), false);
tempCS->currQP[CH_C] = bestCS->currQP[CH_C] =
tempCS->baseQP = bestCS->baseQP = currQP[CH_C];
tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C];
xCompressCU(tempCS, bestCS, *partitioner//进行CU处理
, tempMotCandLUTs
, bestMotCandLUTs
);
#if JVET_M0427_INLOOP_RESHAPER
const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1;
#else
const bool copyUnsplitCTUSignals = bestCS->cus.size() == 1 && KEEP_PRED_AND_RESI_SIGNALS;
#endif
cs.useSubStructure(*bestCS, partitioner->chType, CS::getArea(*bestCS, area, partitioner->chType), copyUnsplitCTUSignals, false, false, copyUnsplitCTUSignals);
}
#endif
/**************************** 重置上下文等后续操作 **************************/
if (m_pcEncCfg->getUseRateCtrl())
{
(m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr)).m_actualMSE = (double)bestCS->dist / (double)m_pcRateCtrl->getRCPic()->getLCU(ctuRsAddr).m_numberOfPixel;
}
// reset context states and uninit context pointer
m_CABACEstimator->getCtx() = m_CurrCtx->start;
m_CurrCtx = 0;
delete partitioner;
#if ENABLE_SPLIT_PARALLELISM && ENABLE_WPP_PARALLELISM
if( m_pcEncCfg->getNumSplitThreads() > 1 && m_pcEncCfg->getNumWppThreads() > 1 )
{
cs.picture->finishCtuPart( area );
}
#endif
// Ensure that a coding was found
// Selected mode's RD-cost must be not MAX_DOUBLE.
CHECK( bestCS->cus.empty() , "No possible encoding found" );
CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" );
#if PRINT
//printf("(%d,%d) %d %d: sum bits is %lld\n", bestCS->area.blocks[0].x, bestCS->area.blocks[0].y, bestCS->area.blocks[0].width, bestCS->area.blocks[0].height, bestCS->fracBits);
// static uint64_t sum = 0;
// sum += (bestCS->fracBits >> 15);
// printf("(%d,%d) %d %d: chroma bits is %lld\n", bestCS->area.blocks[1].x, bestCS->area.blocks[1].y, bestCS->area.blocks[1].width, bestCS->area.blocks[1].height, sum);
#endif
}
VTM4.0中EncCu::xCompressCU代码如下:
void EncCu::xCompressCU( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner
, LutMotionCand *&tempMotCandLUTs
, LutMotionCand *&bestMotCandLUTs
)
{
/*************************************** 初始化 ****************************************/
#if JVET_M0170_MRG_SHARELIST
if (m_shareState == NO_SHARE)
{
tempCS->sharedBndPos = tempCS->area.Y().lumaPos();
tempCS->sharedBndSize.width = tempCS->area.lwidth();
tempCS->sharedBndSize.height = tempCS->area.lheight();
bestCS->sharedBndPos = bestCS->area.Y().lumaPos();
bestCS->sharedBndSize.width = bestCS->area.lwidth();
bestCS->sharedBndSize.height = bestCS->area.lheight();
}
#endif
#if ENABLE_SPLIT_PARALLELISM
CHECK( m_dataId != tempCS->picture->scheduler.getDataId(), "Working in the wrong dataId!" );
if( m_pcEncCfg->getNumSplitThreads() != 1 && tempCS->picture->scheduler.getSplitJobId() == 0 )
{
if( m_modeCtrl->isParallelSplit( *tempCS, partitioner ) )
{
m_modeCtrl->setParallelSplit( true );
xCompressCUParallel( tempCS, bestCS, partitioner );
return;
}
}
#endif
Slice& slice = *tempCS->slice;//当前所处slice
const PPS &pps = *tempCS->pps;//当前所处pps
const SPS &sps = *tempCS->sps;//当前所处sps
const uint32_t uiLPelX = tempCS->area.Y().lumaPos().x;//左上x
const uint32_t uiTPelY = tempCS->area.Y().lumaPos().y;//左上y
const unsigned wIdx = gp_sizeIdxInfo->idxFrom( partitioner.currArea().lwidth() );
const UnitArea currCsArea = clipArea( CS::getArea( *bestCS, bestCS->area, partitioner.chType ), *tempCS->picture );
#if JVET_M0483_IBC
if (m_pImvTempCS && (!slice.isIntra() || slice.getSPS()->getIBCFlag()))
#else
if( m_pImvTempCS && !slice.isIntra() )
#endif
{
tempCS->initSubStructure( *m_pImvTempCS[wIdx], partitioner.chType, partitioner.currArea(), false );
}
tempCS->chType = partitioner.chType;
bestCS->chType = partitioner.chType;
m_modeCtrl->initCULevel( partitioner, *tempCS );
#if JVET_M0140_SBT
if( partitioner.currQtDepth == 0 && partitioner.currMtDepth == 0 && !tempCS->slice->isIntra() && ( sps.getUseSBT() || sps.getUseInterMTS() ) )
{
auto slsSbt = dynamic_cast<SaveLoadEncInfoSbt*>( m_modeCtrl );
int maxSLSize = sps.getUseSBT() ? tempCS->slice->getSPS()->getMaxSbtSize() : MTS_INTER_MAX_CU_SIZE;
slsSbt->resetSaveloadSbt( maxSLSize );
}
m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
#endif
m_CurrCtx->start = m_CABACEstimator->getCtx();
/************************* 处理色度QP ***************************/
m_cuChromaQpOffsetIdxPlus1 = 0;
if( slice.getUseChromaQpAdj() )
{
int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
std::max<int>( 0, sps.getLog2DiffMaxMinCodingBlockSize() - int( pps.getPpsRangeExtension().getDiffCuChromaQpOffsetDepth() ) );
m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1 );
}
if( !m_modeCtrl->anyMode() )
{
m_modeCtrl->finishCULevel( partitioner );
return;
}
/********************* 处理亮度运动信息的LUT ********************/
#if JVET_M0483_IBC
if ((!slice.isIntra() || slice.getSPS()->getIBCFlag())
#else
if (!slice.isIntra()
#endif
&& tempCS->chType == CHANNEL_TYPE_LUMA
)//如果是亮度帧内预测,要用运动信息的LUT
{
tempCS->slice->copyMotionLUTs(tempMotCandLUTs, tempCS->slice->getMotionLUTs());
}
DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cux", uiLPelX ) );
DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuy", uiTPelY ) );
DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuw", tempCS->area.lwidth() ) );
DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) );
DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() );
#if JVET_M0170_MRG_SHARELIST
int startShareThisLevel = 0;
#endif
#if JVET_M0246_AFFINE_AMVR
m_pcInterSearch->resetSavedAffineMotion();
#endif
/***************************对当前ctu进行预测及划分,循环结束时划分预测全部完成****************************/
do//尝试当前编码器各种可用的模式:如skip,帧间,帧内,PCM等,进行预测及划分。
{
EncTestMode currTestMode = m_modeCtrl->currTestMode();
#if PRINT
//printf("(%2d,%2d) %2d %2d : mode:%2d chan:%d\n", tempCS->area.blocks[1].x, tempCS->area.blocks[1].y, tempCS->area.blocks[1].width, tempCS->area.blocks[1].height, currTestMode.type, partitioner.chType);
#endif
if (tempCS->pps->getUseDQP() && CS::isDualITree(*tempCS) && isChroma(partitioner.chType))
{
const Position chromaCentral(tempCS->area.Cb().chromaPos().offset(tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1));
const Position lumaRefPos(chromaCentral.x << getComponentScaleX(COMPONENT_Cb, tempCS->area.chromaFormat), chromaCentral.y << getComponentScaleY(COMPONENT_Cb, tempCS->area.chromaFormat));
const CodingStructure* baseCS = bestCS->picture->cs;
const CodingUnit* colLumaCu = baseCS->getCU(lumaRefPos, CHANNEL_TYPE_LUMA);
if (colLumaCu)
{
currTestMode.qp = colLumaCu->qp;
}
}
#if SHARP_LUMA_DELTA_QP
if( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && partitioner.currDepth <= pps.getMaxCuDQPDepth() )
{
#if ENABLE_SPLIT_PARALLELISM
CHECK( tempCS->picture->scheduler.getSplitJobId() > 0, "Changing lambda is only allowed in the master thread!" );
#endif
if (currTestMode.qp >= 0)
{
updateLambda(&slice, currTestMode.qp);
}
}
#endif
/************ 先看帧间模式 ************/
if( currTestMode.type == ETM_INTER_ME )//1.帧间
{
if( ( currTestMode.opts & ETO_IMV ) != 0 )
{
#if JVET_M0246_AFFINE_AMVR
tempCS->bestCS = bestCS;
xCheckRDCostInterIMV( tempCS, bestCS, partitioner, currTestMode );
tempCS->bestCS = nullptr;
#else
xCheckRDCostInterIMV(tempCS, bestCS, partitioner, currTestMode);
#endif
}
else
{
#if JVET_M0246_AFFINE_AMVR
tempCS->bestCS = bestCS;
xCheckRDCostInter( tempCS, bestCS, partitioner, currTestMode );
tempCS->bestCS = nullptr;
#else
xCheckRDCostInter( tempCS, bestCS, partitioner, currTestMode );
#endif
}
}
#if JVET_M0253_HASH_ME
else if (currTestMode.type == ETM_HASH_INTER)//2.帧间:哈希
{
xCheckRDCostHashInter( tempCS, bestCS, partitioner, currTestMode );
}
#endif
else if( currTestMode.type == ETM_AFFINE )//3.帧间:AFFINE
{
xCheckRDCostAffineMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
}
#if REUSE_CU_RESULTS
else if( currTestMode.type == ETM_RECO_CACHED )
{
xReuseCachedResult( tempCS, bestCS, partitioner );//4.重建?
}
#endif
else if( currTestMode.type == ETM_MERGE_SKIP )//5.帧间:MERGE中的SKIP
{
xCheckRDCostMerge2Nx2N( tempCS, bestCS, partitioner, currTestMode );
CodingUnit* cu = bestCS->getCU(partitioner.chType);
if (cu)
cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
}
else if( currTestMode.type == ETM_MERGE_TRIANGLE )//6.帧间:MERGE中的TRIANGLE
{
xCheckRDCostMergeTriangle2Nx2N( tempCS, bestCS, partitioner, currTestMode );
}
/************ 再看帧内模式 ***********/
else if( currTestMode.type == ETM_INTRA )
{
xCheckRDCostIntra( tempCS, bestCS, partitioner, currTestMode );//7.帧内
}
/************* 特殊的模式 *************/
else if( currTestMode.type == ETM_IPCM )//PCM:直接传像素值的模式
{
xCheckIntraPCM( tempCS, bestCS, partitioner, currTestMode );
}
else if (currTestMode.type == ETM_IBC)//IBC:帧内块用帧间预测方式的模式
{
xCheckRDCostIBCMode(tempCS, bestCS, partitioner, currTestMode);
}
else if (currTestMode.type == ETM_IBC_MERGE)//IBC:merge模式
{
xCheckRDCostIBCModeMerge2Nx2N(tempCS, bestCS, partitioner, currTestMode);
}
/******** 划分:用于递归的模式 *********/
else if( isModeSplit( currTestMode ) )//划分模式,这里会递归调用xcompressCu
{
xCheckModeSplit( tempCS, bestCS, partitioner, currTestMode
, tempMotCandLUTs
, bestMotCandLUTs
, partitioner.currArea()
);
}
else
{
THROW( "Don't know how to handle mode: type = " << currTestMode.type << ", options = " << currTestMode.opts );
}
} while( m_modeCtrl->nextMode( *tempCS, partitioner ) );//进行完所有尝试,找到了最好的划分
#if JVET_M0170_MRG_SHARELIST
if(startShareThisLevel == 1)
{
m_shareState = NO_SHARE;
m_pcInterSearch->setShareState(m_shareState);
setShareStateDec(m_shareState);
}
#endif
/********************************** CTU划分及CU处理完成,记录最佳信息 **********************************/
//////////////////////////////////////////////////////////////////////////
// Finishing CU
#if ENABLE_SPLIT_PARALLELISM
if( bestCS->cus.empty() )
{
CHECK( bestCS->cost != MAX_DOUBLE, "Cost should be maximal if no encoding found" );
CHECK( bestCS->picture->scheduler.getSplitJobId() == 0, "Should always get a result in serial case" );
m_modeCtrl->finishCULevel( partitioner );
return;
}
#endif
// set context states 上下文状态
m_CABACEstimator->getCtx() = m_CurrCtx->best;
// QP from last processed CU for further processing QP进一步处理
bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp;
#if JVET_M0483_IBC
if ((!slice.isIntra() || slice.getSPS()->getIBCFlag())
#else
if (!slice.isIntra()
#endif
&& bestCS->chType == CHANNEL_TYPE_LUMA
#if JVET_M0483_IBC
&& bestCS->cus.size() == 1 && (bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC)
#else
&& bestCS->cus.size() == 1 && bestCS->cus.back()->predMode == MODE_INTER
#endif
&& bestCS->area.Y() == (*bestCS->cus.back()).Y()
)
{
bestCS->slice->updateMotionLUTs(bestMotCandLUTs, (*bestCS->cus.back()));
}
#if JVET_M0427_INLOOP_RESHAPER
bestCS->picture->getPredBuf(currCsArea).copyFrom(bestCS->getPredBuf(currCsArea));
#endif
bestCS->picture->getRecoBuf( currCsArea ).copyFrom( bestCS->getRecoBuf( currCsArea ) );
m_modeCtrl->finishCULevel( partitioner );
#if ENABLE_SPLIT_PARALLELISM
if( tempCS->picture->scheduler.getSplitJobId() == 0 && m_pcEncCfg->getNumSplitThreads() != 1 )
{
tempCS->picture->finishParallelPart( currCsArea );
}
#endif
// Assert if Best prediction mode is NONE
// Selected mode's RD-cost must be not MAX_DOUBLE. 错误信息
CHECK( bestCS->cus.empty() , "No possible encoding found" );
CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
CHECK( bestCS->cost == MAX_DOUBLE , "No possible encoding found" );
}
笔者也只是对帧内预测有一定的了解,对于帧间等部分只有初步了解,尚未系统的学习。如果想了解帧内预测(xCheckRDCostIntra)的其他相关知识,请点击:H.266/VVC代码学习1:帧内预测框架,欢迎查看或指教,笔者会不断更新博客并对之前理解不深处进行勘误,以防老年痴呆,嘿嘿。