1、帧间预测分为两种:merge模式(skip模式是一种特殊的merge模式)和AMVP模式(即普通的帧间预测模式);
2、merge模式只支持2Nx2N的划分。3、 帧内预测只支持2Nx2N和NxN的划分模式
这两个值可以从码率控制对象中得到,也可以自定义
那么处理帧内预测的两种模式:2Nx2N和NxN
注意满足一定条件才会尝试PCM模式。
同样通过一个循环遍历iMinQP到iMaxQP之间的所有QP,对于每一个QP,递归调用xCompressCU。
总结,其实xCompressCU的作用就是从LCU开始深度遍历,计算每一个depth上最优的模式,再综合比较各个depth上最优的模式,选出最优的模式
为了便于理解把xCompressCU的一些无关代码删除,下面是精简版的xCompressCU
/*
** 压缩CU的内部函数
*/
#if AMP_ENC_SPEEDUP // 编码加速宏
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth, PartSize eParentPartSize )
#else
Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth )
#endif
{
Int iBaseQP = xComputeQP( rpcBestCU, uiDepth ); // 基本的量化步长32
// 最小的步长
Int iMinQP;
// 最大的步长
Int iMaxQP;
// 使用码率控制
// 注意这里的QP使用了,码率控制对象计算出来的QP
// 通过QP,码率控制对象控制了编码器的比特率
if ( m_pcEncCfg->getUseRateCtrl() )
{
iMinQP = m_pcRateCtrl->getRCQP();
iMaxQP = m_pcRateCtrl->getRCQP();
}
// 删除了无关代码(对理解预测没有太大用处的代码)...
/*
** 核心
*/
if(!bSliceEnd && !bSliceStart && bInsidePicture )
{
// 此循环测试每一种量化步长,计算率失真,选出最优的QP
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)//1次循环,iMinQP==iMaxQP
{
// 是否为无损模式
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);// false
if (bIsLosslessMode)
{
iQP = lowestQP;
}
// 初始化
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// do inter modes, SKIP and 2Nx2N
/*
** 在处理所有的其他模式之前,先处理帧间skip和2Nx2N的模式
** 特别是对于2Nx2N的划分,要分两次处理:
** 1、尝试merge模式——xCheckRDCostMerge2Nx2N
** 2、尝试普通的帧间预测(即AMVP)——xCheckRDCostInter
*/
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
// skip模式处理
if(m_pcEncCfg->getUseEarlySkipDetection())
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );//by Competition for inter_2Nx2N
}
// merge模式
xCheckRDCostMerge2Nx2N( rpcBestCU, rpcTempCU, &earlyDetectionSkipMode );//by Merge for inter_2Nx2N
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// 2Nx2N模式
if(!m_pcEncCfg->getUseEarlySkipDetection())
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode())
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
if (bIsLosslessMode)
{
iQP = iMinQP;
}
}
if(!earlyDetectionSkipMode)
{
// 在实际的处理过程当中,对LCU的划分都是以4x4大小的块进行划分的,这是为了处理方便,然后以Z扫描的方式进行扫描,这也是为了方便递归
// 遍历每一种量化步长
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
if (bIsLosslessMode)
{
iQP = lowestQP;
}
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// do inter modes, NxN, 2NxN, and Nx2N
/*
** 普通的帧间预测(普通的帧间预测就是AMVP)开始:
** 注意:这里不再处理merge模式和普通帧间的2Nx2N划分模式,
** 这是因为前面已经处理过2Nx2N的划分模式了,merge模式只对于2Nx2N的划分才有效
** 因此下面的处理是没有merge模式和2Nx2N的划分模式的
*/
if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
{
// NxN模式的处理
if(!( (rpcBestCU->getWidth(0)==8) && (rpcBestCU->getHeight(0)==8) ))
{
if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth && doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_NxN );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
// Nx2N模式的处理
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_Nx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_Nx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
// 2NxN的模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxN );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxN)
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
#if 1
//! Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
// 接下来是2NxnU、2NxnD、nLx2N、nRx2N的划分模式的处理
/*
** 接下来的处理有点讲究:
** 1、首先测试AMP_ENC_SPEEDUP宏(表示是否加快编码速度)是否开启
** 2、如果AMP_ENC_SPEEDUP宏开启
** (1)默认情况下,如果TestAMP_Hor、TestAMP_Ver为真,那么可以处理2NxnU、2NxnD、nLx2N、nRx2N这四种模式
** (2)如果TestAMP_Hor、TestAMP_Ver为假,但是开启了AMP_MRG宏,而且TestMergeAMP_Hor、TestMergeAMP_Ver为真,那么还是可以处理2NxnU、2NxnD、nLx2N、nRx2N这四种模式
** 否则不再处理2NxnU、2NxnD、nLx2N、nRx2N这四种模式
** (3)由于上面会根据一些条件来判断是否需要处理2NxnU、2NxnD、nLx2N、nRx2N这四种模式,因此某些时候速度会快一点
** 3、如果AMP_ENC_SPEEDUP关闭
** 那么直接处理2NxnU、2NxnD、nLx2N、nRx2N这四种模式,因为没有了条件限制,这四种模式都要测试,因此,速度会慢一点
*/
if( pcPic->getSlice(0)->getSPS()->getAMPAcc(uiDepth) )
{
#if AMP_ENC_SPEEDUP
Bool bTestAMP_Hor = false, bTestAMP_Ver = false;
#if AMP_MRG
Bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
// 测试TestAMP_Hor和TestAMP_Ver是否为真
deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
#else // else of AMP_MRG
deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver);
#endif // end of AMP_MRG
//! Do horizontal AMP
// TestAMP_Hor为真的话,可以使用2NxnU和2NxnD这两种划分模式
if ( bTestAMP_Hor )
{
// 处理2NxnU模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
// 处理2NxnD模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
#if AMP_MRG
// TestMergeAMP_Hor为真的话可以使用2NxnU、2NxnD这两种模式
else if ( bTestMergeAMP_Hor )
{
// 处理2NxnU模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU, true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
// 处理2NxnD模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD, true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
}
#endif // end of AMP_MRG
//! Do horizontal AMP
// TestAMP_Ver为真可以处理nLx2N、nRx2N两种模式
if ( bTestAMP_Ver )
{
// 处理nLx2N模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
// 处理nRx2N模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
#if AMP_MRG
// TestMergeAMP_Ver为真可以处理nLx2N、nRx2N模式
else if ( bTestMergeAMP_Ver )
{
// 处理nLx2N模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N, true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
{
doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
}
}
// 处理nRx2N模式
if(doNotBlockPu)
{
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N, true );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
#endif // end of AMP_MRG
#else // else of AMP_ENC_SPEEDUP
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
#endif // end of AMP_ENC_SPEEDUP
}
#endif
} // 帧间预测结束!!!!
// do normal intra modes
// speedup for inter frames
// 帧内预测开始,帧内预测只有两种划分:2Nx2N、NxN
if( rpcBestCU->getSlice()->getSliceType() == I_SLICE ||
rpcBestCU->getCbf( 0, TEXT_LUMA ) != 0 ||
rpcBestCU->getCbf( 0, TEXT_CHROMA_U ) != 0 ||
rpcBestCU->getCbf( 0, TEXT_CHROMA_V ) != 0 ) // avoid very complex intra if it is unlikely
{
// 帧内2Nx2N模式
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_2Nx2N );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// 帧内NxN
if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth )//g_uiMaxCUDepth=4,g_uiAddCUDepth=1
{
if( rpcTempCU->getWidth(0) > ( 1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) )
{
xCheckRDCostIntra( rpcBestCU, rpcTempCU, SIZE_NxN );
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
} // 帧内预测结束!!!
// test PCM
// 尝试PCM模式
if(pcPic->getSlice(0)->getSPS()->getUsePCM()
&& rpcTempCU->getWidth(0) <= (1<getSlice(0)->getSPS()->getPCMLog2MaxSize())
&& rpcTempCU->getWidth(0) >= (1<getSlice(0)->getSPS()->getPCMLog2MinSize()) )
{
UInt uiRawBits = (2 * g_bitDepthY + g_bitDepthC) * rpcBestCU->getWidth(0) * rpcBestCU->getHeight(0) / 2;
UInt uiBestBits = rpcBestCU->getTotalBits();
if((uiBestBits > uiRawBits) || (rpcBestCU->getTotalCost() > m_pcRdCost->calcRdCost(uiRawBits, 0)))
{
xCheckIntraPCM (rpcBestCU, rpcTempCU);
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
}
}
if (bIsLosslessMode)
{
iQP = iMinQP;
}
}
}
// 重置比特数
m_pcEntropyCoder->resetBits();
// 对分割标志进行编码
m_pcEntropyCoder->encodeSplitFlag( rpcBestCU, 0, uiDepth, true );
// 比特数量统计
rpcBestCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
rpcBestCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
// 总的消耗统计
rpcBestCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcBestCU->getTotalBits(), rpcBestCU->getTotalDistortion() );
// Early CU determination
// HM15.0的配置中没有使用早期的CU
if( m_pcEncCfg->getUseEarlyCU() && rpcBestCU->isSkipped(0) )
{
bSubBranch = false;
}
else
{
bSubBranch = true;
}
}//if(!bSliceEnd && !bSliceStart && bInsidePicture )
else if(!(bSliceEnd && bInsidePicture))
{
bBoundary = true;
}
// 删除了无关代码(对理解预测没有太大用处的代码)...
// 从最小量化步长到最大量化步长,递归处理子CU,然后选取最优的量化步长和最优划分模式
for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
{
const Bool bIsLosslessMode = false; // False at this level. Next level down may set it to true.
rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
// further split
// 进一步的分割
if( bSubBranch && uiDepth < g_uiMaxCUDepth - g_uiAddCUDepth )
{
UChar uhNextDepth = uiDepth+1;
TComDataCU* pcSubBestPartCU = m_ppcBestCU[uhNextDepth];
TComDataCU* pcSubTempPartCU = m_ppcTempCU[uhNextDepth];
// 进一步的分割,当前CU又被划分成为4个子CU
for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++ )
{
pcSubBestPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init.
pcSubTempPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP ); // clear sub partition datas or init.
Bool bInSlice = pcSubBestPartCU->getSCUAddr()+pcSubBestPartCU->getTotalNumPart()>pcSlice->getSliceSegmentCurStartCUAddr()&&pcSubBestPartCU->getSCUAddr()getSliceSegmentCurEndCUAddr();
if(bInSlice && ( pcSubBestPartCU->getCUPelX() < pcSlice->getSPS()->getPicWidthInLumaSamples() ) && ( pcSubBestPartCU->getCUPelY() < pcSlice->getSPS()->getPicHeightInLumaSamples() ) )
{
if ( 0 == uiPartUnitIdx) //initialize RD with previous depth buffer
{
m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
}
else
{
m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
}
#if AMP_ENC_SPEEDUP // 如果启用了编码加速选项
if ( rpcBestCU->isIntra(0) )
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth, SIZE_NONE );
}
else
{
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth, rpcBestCU->getPartitionSize(0) );
}
#else // 没有使用编码加速选项
// 递归处理子CU
xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth );
#endif
rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth ); // Keep best part data to current temporary data.
xCopyYuv2Tmp( pcSubBestPartCU->getTotalNumPart()*uiPartUnitIdx, uhNextDepth );
}
else if (bInSlice)
{
pcSubBestPartCU->copyToPic( uhNextDepth );
rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth );
}
}
// 计算并更新最优的代价——begin
if( !bBoundary )
{
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeSplitFlag( rpcTempCU, 0, uiDepth, true );
rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
}
// 计算RD代价
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
if( (g_uiMaxCUWidth>>uiDepth) == rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() && rpcTempCU->getSlice()->getPPS()->getUseDQP())
{
Bool hasResidual = false;
for( UInt uiBlkIdx = 0; uiBlkIdx < rpcTempCU->getTotalNumPart(); uiBlkIdx ++)
{
if( ( pcPic->getCU( rpcTempCU->getAddr() )->getSliceSegmentStartCU(uiBlkIdx+rpcTempCU->getZorderIdxInCU()) == rpcTempCU->getSlice()->getSliceSegmentCurStartCUAddr() ) &&
( rpcTempCU->getCbf( uiBlkIdx, TEXT_LUMA ) || rpcTempCU->getCbf( uiBlkIdx, TEXT_CHROMA_U ) || rpcTempCU->getCbf( uiBlkIdx, TEXT_CHROMA_V ) ) )
{
hasResidual = true;
break;
}
}
UInt uiTargetPartIdx;
if ( pcPic->getCU( rpcTempCU->getAddr() )->getSliceSegmentStartCU(rpcTempCU->getZorderIdxInCU()) != pcSlice->getSliceSegmentCurStartCUAddr() )
{
uiTargetPartIdx = pcSlice->getSliceSegmentCurStartCUAddr() % pcPic->getNumPartInCU() - rpcTempCU->getZorderIdxInCU();
}
else
{
uiTargetPartIdx = 0;
}
if ( hasResidual )
{
#if !RDO_WITHOUT_DQP_BITS
m_pcEntropyCoder->resetBits();
m_pcEntropyCoder->encodeQP( rpcTempCU, uiTargetPartIdx, false );
rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // dQP bits
rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
#endif
Bool foundNonZeroCbf = false;
rpcTempCU->setQPSubCUs( rpcTempCU->getRefQP( uiTargetPartIdx ), rpcTempCU, 0, uiDepth, foundNonZeroCbf );
assert( foundNonZeroCbf );
}
else
{
rpcTempCU->setQPSubParts( rpcTempCU->getRefQP( uiTargetPartIdx ), 0, uiDepth ); // set QP to default QP
}
}
m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
Bool isEndOfSlice = rpcBestCU->getSlice()->getSliceMode()==FIXED_NUMBER_OF_BYTES
&& (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceArgument()<<3);
Bool isEndOfSliceSegment = rpcBestCU->getSlice()->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES
&& (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceSegmentArgument()<<3);
if(isEndOfSlice||isEndOfSliceSegment)
{
rpcBestCU->getTotalCost()=rpcTempCU->getTotalCost()+1;
}
// 选择最优的划分模式
xCheckBestMode( rpcBestCU, rpcTempCU, uiDepth); // RD compare current larger prediction
// 计算并更新最优代价——end
} // with sub partitioned prediction.
}
// 删除了无关代码(对理解预测没有太大用处的代码)...
}