H.265/HEVC的CU划分只有四叉树划分方式,如下图所示:
在HEVC的参考软件HM中,CU的划分是通过调用xCompressCU函数实现的,通过递归调用自身实现CU的划分以及编码,递归划分的流程比较简单,首先是对当前CU进行编码,获得当前整个CU的RD Cost,然后将当前CU进行四叉树划分,分别计算每一个子CU的RD Cost,最后比较划分后的四种子CU的RD Cost之和,从而决定当前CU是否进行划分。由于HM中xCompressCU函数是通过递归实现四叉树划分的,所以考虑划分时应该从下往上计算RD Cost。
以下16x16划分8x8为例,当块划分到16x16时,先首先调用一次xCheckRDCostIntra函数,以16x16大小的CU进行预测等操作,计算出RD Cost和失真,此时将计算得到的RD Cost存入到rpcBestCU中,在xCheckRDCostIntra中调用xCheckBestMode函数,将重建的16x16CU块存入到m_ppcRecoYuvBest[depth](这里的depth是2)中;然后进入for循环中,调用四次xCompressCU函数,计算出四个子CU的RD Cost与失真之和,将RD Cost存入到rpcTempCU中,并通过xCopyYuv2Tmp函数,将重建YUV存入到m_ppcRecoYuvTemp[depth](这里的depth是2)中。在最后,再调用xCheckBestMode函数,决定比较rpcBestCU, rpcTempCU的RD Cost决定是否对16x16CU进行进一步的划分,如果需要进一步划分,则将m_ppcRecoYuvTemp[depth](这里的depth是2)中的重建YUV拷贝到m_ppcRecoYuvBest[depth](这里的depth是2)中。
在进行CU划分时,注意每一个划分深度的CU的重建值、预测值以及残差值都是可以获取到的,需要注意其使用的位置。
TComDataCU** m_ppcBestCU; ///< Best CUs in each depth
TComDataCU** m_ppcTempCU; ///< Temporary CUs in each depth
UChar m_uhTotalDepth;
TComYuv** m_ppcPredYuvBest; ///< Best Prediction Yuv for each depth
TComYuv** m_ppcResiYuvBest; ///< Best Residual Yuv for each depth
TComYuv** m_ppcRecoYuvBest; ///< Best Reconstruction Yuv for each depth
TComYuv** m_ppcPredYuvTemp; ///< Temporary Prediction Yuv for each depth
TComYuv** m_ppcResiYuvTemp; ///< Temporary Residual Yuv for each depth
TComYuv** m_ppcRecoYuvTemp; ///< Temporary Reconstruction Yuv for each depth
TComYuv** m_ppcOrigYuv; ///< Original Yuv for each depth
代码及注释如下:
// ====================================================================================================================
// Protected member functions
// ====================================================================================================================
/** Compress a CU block recursively with enabling sub-CTU-level delta QP
* - for loop of QP value to compress the current CU with all possible QP
* 通过启用子CTU级增量QP,递归压缩CU块
* 循环所有可能的QP值,用所有可能的QP压缩当前CU
*/
#if AMP_ENC_SPEEDUP
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth DEBUG_STRING_FN_DECLARE(sDebug_), PartSize eParentPartSize )
#else
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, const UInt uiDepth )
#endif
{
TComPic* pcPic = rpcBestCU->getPic();
DEBUG_STRING_NEW(sDebug)
const TComPPS &pps=*(rpcTempCU->getSlice()->getPPS());
const TComSPS &sps=*(rpcTempCU->getSlice()->getSPS());
// These are only used if getFastDeltaQp() is true
const UInt fastDeltaQPCuMaxSize = Clip3(sps.getMaxCUHeight()>>sps.getLog2DiffMaxMinCodingBlockSize(), sps.getMaxCUHeight(), 32u);
// get Original YUV data from picture
// 获得原始YUV
m_ppcOrigYuv[uiDepth]->copyFromPicYuv( pcPic->getPicYuvOrg(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu() );
// variable for Cbf fast mode PU decision
Bool doNotBlockPu = true;
Bool earlyDetectionSkipMode = false;
const UInt uiLPelX = rpcBestCU->getCUPelX(); //CU左侧位置X
const UInt uiRPelX = uiLPelX + rpcBestCU->getWidth(0) - 1;//CU右侧位置X
const UInt uiTPelY = rpcBestCU->getCUPelY();//CU上侧位置Y
const UInt uiBPelY = uiTPelY + rpcBestCU->getHeight(0) - 1;//CU下侧位置Y
const UInt uiWidth = rpcBestCU->getWidth(0);
Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );
Int iMinQP;
Int iMaxQP;
Bool isAddLowestQP = false;
const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents();
if( uiDepth <= pps.getMaxCuDQPDepth() )
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
}
else
{
iMinQP = rpcTempCU->getQP(0);
iMaxQP = rpcTempCU->getQP(0);
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() )
{
if ( uiDepth <= pps.getMaxCuDQPDepth() )
{
// keep using the same m_QP_LUMA_OFFSET in the same CTU
m_lumaQPOffset = calculateLumaDQP(rpcTempCU, 0, m_ppcOrigYuv[uiDepth]);
}
iMinQP = Clip3(-sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP - m_lumaQPOffset);
iMaxQP = iMinQP; // force encode choose the modified QO
}
if ( m_pcEncCfg->getUseRateCtrl() )
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
}
// transquant-bypass (TQB) processing loop variable initialisation ---
const Int lowestQP = iMinQP; // For TQB, use this QP which is the lowest non TQB QP tested (rather than QP'=0) - that way delta QPs are smaller, and TQB can be tested at all CU levels.
if ( (pps.getTransquantBypassEnabledFlag()) )
{
isAddLowestQP = true; // mark that the first iteration is to cost TQB mode.
iMinQP = iMinQP - 1; // increase loop variable range by 1, to allow testing of TQB mode along with other QPs
if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
{
iMaxQP = iMinQP;
}
}
TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
// 是否在边界
const Bool bBoundary = !( uiRPelX < sps.getPicWidthInLumaSamples() && uiBPelY < sps.getPicHeightInLumaSamples() );
if ( !bBoundary )
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && uiDepth <= pps.getMaxCuDQPDepth() )
{
getSliceEncoder()->updateLambda(pcSlice, iQP);
}
m_cuChromaQpOffsetIdxPlus1 = 0;
if (pcSlice->getUseChromaQpAdj())
{
/* Pre-estimation of chroma QP based on input block activity may be performed
* here, using for example m_ppcOrigYuv[uiDepth] */
/* To exercise the current code, the index used for adjustment is based on
* block position
*/
Int lgMinCuSize = sps.getLog2MinCodingBlockSize() +
std::max(0, sps.getLog2DiffMaxMinCodingBlockSize()-Int(pps.getPpsRangeExtension().getDiffCuChromaQpOffsetDepth()));
m_cuChromaQpOffsetIdxPlus1 = ((uiLPelX >> lgMinCuSize) + (uiTPelY >> lgMinCuSize)) % (pps.getPpsRangeExtension().getChromaQpOffsetListLen() + 1);
}
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// do inter modes, SKIP and 2Nx2N 帧间模式遍历 SKIP 和 2Nx2N
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
// 2Nx2N
if(m_pcEncCfg->getUseEarlySkipDetection())
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );//by Competition for inter_2Nx2N
}
// SKIP
xCheckRDCostMerge2Nx2N( rpcBestCU, rpcTempCU DEBUG_STRING_PASS_INTO(sDebug), &earlyDetectionSkipMode );//by Merge for inter_2Nx2N
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(!m_pcEncCfg->getUseEarlySkipDetection())
{
// 2Nx2N, NxN
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode())
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
if (bIsLosslessMode) // Restore loop variable if lossless mode was searched.
{
iQP = iMinQP;
}
} //for (iQP)
if(!earlyDetectionSkipMode)
{
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP); // If lossless, then iQP is irrelevant for subsequent modules.
if (bIsLosslessMode)
{
iQP = lowestQP;
}
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// do inter modes, NxN, 2NxN, and Nx2N 帧间模式遍历 NxN, 2NxN, Nx2N
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
// 2Nx2N, NxN
if(!( (rpcBestCU->getWidth(0)==8) && (rpcBestCU->getHeight(0)==8) ))
{
if( uiDepth == sps.getLog2DiffMaxMinCodingBlockSize() && doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_NxN DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_Nx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
if(doNotBlockPu)
{
xCheckRDCostInter ( rpcBestCU, rpcTempCU, SIZE_2NxN DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxN)
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
//! Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
if(sps.getUseAMP() && uiDepth < sps.getLog2DiffMaxMinCodingBlockSize() )
{
#if AMP_ENC_SPEEDUP
Bool bTestAMP_Hor = false, bTestAMP_Ver = false;
#if AMP_MRG
Bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
#else
deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver);
#endif
//! Do horizontal AMP
if ( bTestAMP_Hor )
{
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
#if AMP_MRG
else if ( bTestMergeAMP_Hor )
{
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU DEBUG_STRING_PASS_INTO(sDebug), true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD DEBUG_STRING_PASS_INTO(sDebug), true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
#endif
//! Do horizontal AMP
if ( bTestAMP_Ver )
{
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
#if AMP_MRG
else if ( bTestMergeAMP_Ver )
{
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N DEBUG_STRING_PASS_INTO(sDebug), true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N DEBUG_STRING_PASS_INTO(sDebug), true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode ); //初始化tempCU相关RD数据
}
}
#endif
#else
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
#endif
}
}
// do normal intra modes 帧内模式
// speedup for inter frames
#if MCTS_ENC_CHECK
if ( m_pcEncCfg->getTMCTSSEITileConstraint() || (rpcBestCU->getSlice()->getSliceType() == I_SLICE) ||
((!m_pcEncCfg->getDisableIntraPUsInInterSlices()) && (
(rpcBestCU->getCbf(0, COMPONENT_Y) != 0) ||
((rpcBestCU->getCbf(0, COMPONENT_Cb) != 0) && (numberValidComponents > COMPONENT_Cb)) ||
((rpcBestCU->getCbf(0, COMPONENT_Cr) != 0) && (numberValidComponents > COMPONENT_Cr)) // avoid very complex intra if it is unlikely
)))
{
#else
if((rpcBestCU->getSlice()->getSliceType() == I_SLICE) ||
((!m_pcEncCfg->getDisableIntraPUsInInterSlices()) && (
(rpcBestCU->getCbf( 0, COMPONENT_Y ) != 0) ||
((rpcBestCU->getCbf( 0, COMPONENT_Cb ) != 0) && (numberValidComponents > COMPONENT_Cb)) ||
((rpcBestCU->getCbf( 0, COMPONENT_Cr ) != 0) && (numberValidComponents > COMPONENT_Cr)) // avoid very complex intra if it is unlikely
)))
{
#endif
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if( uiDepth == sps.getLog2DiffMaxMinCodingBlockSize() )
{
if( rpcTempCU->getWidth(0) > ( 1 << sps.getQuadtreeTULog2MinSize() ) )
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN DEBUG_STRING_PASS_INTO(sDebug) );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
}
// test PCM
if(sps.getUsePCM()
&& rpcTempCU->getWidth(0) <= (1<getWidth(0) >= (1<getWidth(0), rpcBestCU->getHeight(0), rpcBestCU->getPic()->getChromaFormat(), sps.getBitDepths().recon);
UInt uiBestBits = rpcBestCU->getTotalBits();
if((uiBestBits > uiRawBits) || (rpcBestCU->getTotalCost() > m_pcRdCost->calcRdCost(uiRawBits, 0)))
{
xCheckIntraPCM (rpcBestCU, rpcTempCU);
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
if (bIsLosslessMode) // Restore loop variable if lossless mode was searched.
{
iQP = iMinQP;
}
}
}
if( rpcBestCU->getTotalCost()!=MAX_DOUBLE )
{
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_NEXT_BEST]);
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeSplitFlag( rpcBestCU, 0, uiDepth, true );
rpcBestCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
rpcBestCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
rpcBestCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcBestCU->getTotalBits(), rpcBestCU->getTotalDistortion() );
m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_NEXT_BEST]);
}
} //if ( !bBoundary )
// copy original YUV samples to PCM buffer
if( rpcBestCU->getTotalCost()!=MAX_DOUBLE && rpcBestCU->isLosslessCoded(0) && (rpcBestCU->getIPCMFlag(0) == false))
{
xFillPCMBuffer(rpcBestCU, m_ppcOrigYuv[uiDepth]);
}
if( uiDepth == pps.getMaxCuDQPDepth() )
{
Int idQP = m_pcEncCfg->getMaxDeltaQP();
iMinQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
iMaxQP = Clip3( -sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
}
else if( uiDepth < pps.getMaxCuDQPDepth() )
{
iMinQP = iBaseQP;
iMaxQP = iBaseQP;
}
else
{
const Int iStartQP = rpcTempCU->getQP(0);
iMinQP = iStartQP;
iMaxQP = iStartQP;
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() )
{
iMinQP = Clip3(-sps.getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP - m_lumaQPOffset);
iMaxQP = iMinQP;
}
if ( m_pcEncCfg->getUseRateCtrl() )
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
}
if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
{
iMaxQP = iMinQP; // If all TUs are forced into using transquant bypass, do not loop here.
}
const Bool bSubBranch = bBoundary || !( m_pcEncCfg->getUseEarlyCU() && rpcBestCU->getTotalCost()!=MAX_DOUBLE && rpcBestCU->isSkipped(0) );
if( bSubBranch && uiDepth < sps.getLog2DiffMaxMinCodingBlockSize() && (!getFastDeltaQp() || uiWidth > fastDeltaQPCuMaxSize || bBoundary))
{
// further split 进行进一步划分
Double splitTotalCost = 0;
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = false; // False at this level. Next level down may set it to true.
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
UChar uhNextDepth = uiDepth+1;
TComDataCU* pcSubBestPartCU = m_ppcBestCU[uhNextDepth];
TComDataCU* pcSubTempPartCU = m_ppcTempCU[uhNextDepth];
DEBUG_STRING_NEW(sTempDebug)
// 遍历划分后的四个子区域
for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++ )
{
pcSubBestPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init.
pcSubTempPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init.
if( ( pcSubBestPartCU->getCUPelX() < sps.getPicWidthInLumaSamples() ) && ( pcSubBestPartCU->getCUPelY() < sps.getPicHeightInLumaSamples() ) )
{
if ( 0 == uiPartUnitIdx) //initialize RD with previous depth buffer
{
m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
}
else
{
m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
}
#if AMP_ENC_SPEEDUP
DEBUG_STRING_NEW(sChild)
if ( !(rpcBestCU->getTotalCost()!=MAX_DOUBLE && rpcBestCU->isInter(0)) )
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), NUMBER_OF_PART_SIZES );
}
else
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), rpcBestCU->getPartitionSize(0) );
}
DEBUG_STRING_APPEND(sTempDebug, sChild)
#else
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth );
#endif
//将最佳部分数据保留为当前临时数据。
rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth );// Keep best part data to current temporary data.
// 将下一深度的最佳m_ppcRecoYuvBest拷贝到当前深度的m_ppcRecoYuvTemp部分区域中
xCopyYuv2Tmp( pcSubBestPartCU->getTotalNumPart()*uiPartUnitIdx, uhNextDepth );
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && pps.getMaxCuDQPDepth() >= 1 )
{
splitTotalCost += pcSubBestPartCU->getTotalCost();
}
}
else
{
pcSubBestPartCU->copyToPic( uhNextDepth );
rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth );
}
}
m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
if( !bBoundary )
{
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeSplitFlag( rpcTempCU, 0, uiDepth, true );
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && pps.getMaxCuDQPDepth() >= 1 )
{
Int splitBits = m_pcEntropyCoder->getNumberOfWrittenBits();
Double splitBitCost = m_pcRdCost->calcRdCost( splitBits, 0 );
splitTotalCost += splitBitCost;
}
rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
}
if ( m_pcEncCfg->getLumaLevelToDeltaQPMapping().isEnabled() && pps.getMaxCuDQPDepth() >= 1 )
{
rpcTempCU->getTotalCost() = splitTotalCost;
}
else
{
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
}
if( uiDepth == pps.getMaxCuDQPDepth() && pps.getUseDQP())
{
Bool hasResidual = false;
for( UInt uiBlkIdx = 0; uiBlkIdx < rpcTempCU->getTotalNumPart(); uiBlkIdx ++)
{
if( ( rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Y)
|| (rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Cb) && (numberValidComponents > COMPONENT_Cb))
|| (rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Cr) && (numberValidComponents > COMPONENT_Cr)) ) )
{
hasResidual = true;
break;
}
}
if ( hasResidual )
{
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeQP( rpcTempCU, 0, false );
rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // dQP bits
rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
Bool foundNonZeroCbf = false;
rpcTempCU->setQPSubCUs( rpcTempCU->getRefQP( 0 ), 0, uiDepth, foundNonZeroCbf );
assert( foundNonZeroCbf );
}
else
{
rpcTempCU->setQPSubParts( rpcTempCU->getRefQP( 0 ), 0, uiDepth ); // set QP to default QP
}
}
m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
// If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then
// a proper RD evaluation cannot be performed. Therefore, termination of the
// slice/slice-segment must be made prior to this CTU.
// This can be achieved by forcing the decision to be that of the rpcTempCU.
// The exception is each slice / slice-segment must have at least one CTU.
if (rpcBestCU->getTotalCost()!=MAX_DOUBLE)
{
const Bool isEndOfSlice = pcSlice->getSliceMode()==FIXED_NUMBER_OF_BYTES
&& ((pcSlice->getSliceBits()+rpcBestCU->getTotalBits())>pcSlice->getSliceArgument()<<3)
&& rpcBestCU->getCtuRsAddr() != pcPic->getPicSym()->getCtuTsToRsAddrMap(pcSlice->getSliceCurStartCtuTsAddr())
&& rpcBestCU->getCtuRsAddr() != pcPic->getPicSym()->getCtuTsToRsAddrMap(pcSlice->getSliceSegmentCurStartCtuTsAddr());
const Bool isEndOfSliceSegment = pcSlice->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES
&& ((pcSlice->getSliceSegmentBits()+rpcBestCU->getTotalBits()) > pcSlice->getSliceSegmentArgument()<<3)
&& rpcBestCU->getCtuRsAddr() != pcPic->getPicSym()->getCtuTsToRsAddrMap(pcSlice->getSliceSegmentCurStartCtuTsAddr());
// Do not need to check slice condition for slice-segment since a slice-segment is a subset of a slice.
if(isEndOfSlice||isEndOfSliceSegment)
{
rpcBestCU->getTotalCost()=MAX_DOUBLE;
}
}
// 检查最优模式,如果rpcTempCU的代价小于rpcBestCU存的代价,则将temp中的数据拷贝到best中
xCheckBestMode( rpcBestCU, rpcTempCU, uiDepth DEBUG_STRING_PASS_INTO(sDebug) DEBUG_STRING_PASS_INTO(sTempDebug) DEBUG_STRING_PASS_INTO(false) ); // RD compare current larger prediction // with sub partitioned prediction.
}
}
DEBUG_STRING_APPEND(sDebug_, sDebug);
// 将最佳数据复制到图片以进行下一分区预测。
rpcBestCU->copyToPic(uiDepth);// Copy Best data to Picture for next partition prediction.
// 将Yuv数据复制到图片Yuv
xCopyYuv2Pic( rpcBestCU->getPic(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu(), uiDepth, uiDepth ); // Copy Yuv data to picture Yuv
if (bBoundary)
{
return;
}
// Assert if Best prediction mode is NONE
// Selected mode's RD-cost must be not MAX_DOUBLE.
assert( rpcBestCU->getPartitionSize ( 0 ) != NUMBER_OF_PART_SIZES );
assert( rpcBestCU->getPredictionMode( 0 ) != NUMBER_OF_PREDICTION_MODES );
assert( rpcBestCU->getTotalCost ( ) != MAX_DOUBLE );
}