HEVC代码学习42:estIntraPredLumaQT函数

在之前的 HEVC代码学习37:帧内预测代码整体学习 中已经提到,estIntraPredLumaQT是亮度帧内预测的入口函数,下面将对该函数进行详细学习。

estIntraPredLumaQT中完成了亮度分量的帧内预测,其主要流程如下:
一、初始化各种参数。
二、为了减少率失真优化次数,HEVC中默认使用帧内快速搜索算法,将分粗选和细选两个阶段进行。如果不使用快速搜索,将对所有帧内预测模式进行率失真优化。
1.粗选阶段:遍历35种帧内预测模式使用predIntraAng计算预测值,计算比较哈达玛失真,调用xUpdateCandList构建全率失真优化候选列表。全率失真优化候选列表长度numModesForFullRD由块宽度决定。注意在粗选阶段全部使用哈达玛变换计算失真,提高速度。
2.细选阶段:调用getIntraDirPredictor构建MPM列表,加入全率失真优化候选列表中。遍历全率失真优化候选列表,调用xRecurIntraCodingLumaQT进行变换量化重构,计算率失真代价,注意在这里该函数倒数第二个参数是bCheckFirst=true,表示会按照四叉树的方式继续向下划分。比较率失真代价,找到最优模式。
三、对获得的最优模式再次调用xRecurIntraCodingLumaQT,此时倒数第二个参数设置为false,检测同一模式下,bCheckFirst为true和false的情况下,选出最优模式为最终的亮度帧内预测模式。
四、收尾工作,记录信息:设置重建块、Cbf、上下文模型、总失真等。

其中调用了重要函数:predIntraAng(计算帧内预测模式的预测值)、getIntraDirPredictor(利用临近PU构建MPM列表)、xRecurIntraCodingLumaQT(进行帧内亮度分量的变换量化重构,计算率失真代价)。

代码分析:

//亮度分量预测
Void
    TEncSearch::estIntraPredLumaQT(TComDataCU* pcCU,
    TComYuv*    pcOrgYuv,
    TComYuv*    pcPredYuv,
    TComYuv*    pcResiYuv,
    TComYuv*    pcRecoYuv,
    Pel         resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE]
DEBUG_STRING_FN_DECLARE(sDebug))
{
    const UInt         uiDepth               = pcCU->getDepth(0);       //划分深度
    const UInt         uiInitTrDepth         = pcCU->getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1;     //当CU为2Nx2N时,初始变换深度为0;否则为1。
    const UInt         uiNumPU               = 1<<(2*uiInitTrDepth);        //PU分块数
    const UInt         uiQNumParts           = pcCU->getTotalNumPart() >> 2;        
    const UInt         uiWidthBit            = pcCU->getIntraSizeIdx(0);
    const ChromaFormat chFmt                 = pcCU->getPic()->getChromaFormat();       //颜色格式
    const UInt         numberValidComponents = getNumberValidComponents(chFmt);
    const TComSPS     &sps                   = *(pcCU->getSlice()->getSPS());           //SPS
    const TComPPS     &pps                   = *(pcCU->getSlice()->getPPS());           //PPS
    Distortion   uiOverallDistY        = 0;
    UInt         CandNum;           //候选数
    Double       CandCostList[ FAST_UDI_MAX_RDMODE_NUM ];       //候选代价列表
    Pel          resiLumaPU[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];        //亮度残差

    Bool    bMaintainResidual[NUMBER_OF_STORED_RESIDUAL_TYPES];
    for (UInt residualTypeIndex = 0; residualTypeIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; residualTypeIndex++)
    {
        bMaintainResidual[residualTypeIndex] = true; //assume true unless specified otherwise
    }

    bMaintainResidual[RESIDUAL_ENCODER_SIDE] = !(m_pcEncCfg->getUseReconBasedCrossCPredictionEstimate());

    // Lambda calculation at equivalent Qp of 4 is recommended because at that Qp, the quantisation divisor is 1.
#if FULL_NBIT
    const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
        sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0)))
        : m_pcRdCost->getSqrtLambda();
#else
    //计算Lambda
    const Double sqrtLambdaForFirstPass= (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING && pcCU->getCUTransquantBypass(0)) ?
        sqrt(0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (sps.getBitDepth(CHANNEL_TYPE_LUMA) - 8)) / 3.0)))
        : m_pcRdCost->getSqrtLambda();
#endif

    //===== set QP and clear Cbf =====
    //设置QP,清除Cbf
    if ( pps.getUseDQP() == true)
    {
        pcCU->setQPSubParts( pcCU->getQP(0), 0, uiDepth );
    }
    else
    {
        pcCU->setQPSubParts( pcCU->getSlice()->getSliceQp(), 0, uiDepth );
    }

    //===== loop over partitions =====
    //迭代分块
    TComTURecurse tuRecurseCU(pcCU, 0);
    TComTURecurse tuRecurseWithPU(tuRecurseCU, false, (uiInitTrDepth==0)?TComTU::DONT_SPLIT : TComTU::QUAD_SPLIT);

    do      //遍历一个CU的所有PU
    {
        const UInt uiPartOffset=tuRecurseWithPU.GetAbsPartIdxTU();      //当前PU的偏移
        //  for( UInt uiPU = 0, uiPartOffset=0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
        //{
        //===== init pattern for luma prediction =====
        DEBUG_STRING_NEW(sTemp2)

            //===== determine set of modes to be tested (using prediction signal only) =====
            Int numModesAvailable     = 35; //total number of Intra modes       可用模式总数
        UInt uiRdModeList[FAST_UDI_MAX_RDMODE_NUM];     
        //根据块宽度设定全率失真优化的模式数
        Int numModesForFullRD = m_pcEncCfg->getFastUDIUseMPMEnabled()?g_aucIntraModeNumFast_UseMPM[ uiWidthBit ] : g_aucIntraModeNumFast_NotUseMPM[ uiWidthBit ];

        // this should always be true
        assert (tuRecurseWithPU.ProcessComponentSection(COMPONENT_Y));
        initIntraPatternChType( tuRecurseWithPU, COMPONENT_Y, true DEBUG_STRING_PASS_INTO(sTemp2) );        //初始化

        /**************************************************快速搜索粗选阶段**************************************************/
        Bool doFastSearch = (numModesForFullRD != numModesAvailable);       //默认开启快速搜索
        if (doFastSearch)           //快速搜索  
        {
            assert(numModesForFullRD < numModesAvailable);

            for( Int i=0; i < numModesForFullRD; i++ )      //对numModesForFullRD个候选设置代价为最大
            {
                CandCostList[ i ] = MAX_DOUBLE;
            }
            CandNum = 0;

            const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);       //获取Y分量PU
            const UInt uiAbsPartIdx=tuRecurseWithPU.GetAbsPartIdxTU();          //PU地址

            Pel* piOrg         = pcOrgYuv ->getAddr( COMPONENT_Y, uiAbsPartIdx );       //原始图像
            Pel* piPred        = pcPredYuv->getAddr( COMPONENT_Y, uiAbsPartIdx );       //预测图像
            UInt uiStride      = pcPredYuv->getStride( COMPONENT_Y );           //跨度
            DistParam distParam;        //失真参数
            const Bool bUseHadamard=pcCU->getCUTransquantBypass(0) == 0;        //是否使用哈达玛变换
            m_pcRdCost->setDistParam(distParam, sps.getBitDepth(CHANNEL_TYPE_LUMA), piOrg, uiStride, piPred, uiStride, puRect.width, puRect.height, bUseHadamard);      //使用哈达玛变换初始化失真
            distParam.bApplyWeight = false;
            for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ )      //遍历35种帧内预测模式
            {
                UInt       uiMode = modeIdx;        //当前模式
                Distortion uiSad  = 0;      //失真

                //参考采样滤波
                const Bool bUseFilter=TComPrediction::filteringIntraReferenceSamples(COMPONENT_Y, uiMode, puRect.width, puRect.height, chFmt, sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag());

                //计算相应模式的预测值
                predIntraAng( COMPONENT_Y, uiMode, piOrg, uiStride, piPred, uiStride, tuRecurseWithPU, bUseFilter, TComPrediction::UseDPCMForFirstPassIntraEstimation(tuRecurseWithPU, uiMode) );

                // use hadamard transform here
                uiSad+=distParam.DistFunc(&distParam);      //计算哈达玛失真

                UInt   iModeBits = 0;           //bit数

                // NB xModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
                iModeBits+=xModeBitsIntra( pcCU, uiMode, uiPartOffset, uiDepth, CHANNEL_TYPE_LUMA );        //计算bit数

                Double cost      = (Double)uiSad + (Double)iModeBits * sqrtLambdaForFirstPass;      //使用哈达玛失真计算率失真代价

#if DEBUG_INTRA_SEARCH_COSTS
                std::cout << "1st pass mode " << uiMode << " SAD = " << uiSad << ", mode bits = " << iModeBits << ", cost = " << cost << "\n";
#endif
                //比较代价更新候选列表
                CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
            }

            /**************************************************快速搜索细选阶段**************************************************/
            if (m_pcEncCfg->getFastUDIUseMPMEnabled())          
            {
                Int uiPreds[NUM_MOST_PROBABLE_MODES] = {-1, -1, -1};        //初始化MPM列表,长度为3

                Int iMode = -1;
                //利用临近PU构建MPM
                pcCU->getIntraDirPredictor( uiPartOffset, uiPreds, COMPONENT_Y, &iMode );

                //当有可用MPM时,numCand就等于MPM对应的模式,否则为NUM_MOST_PROBABLE_MODES
                const Int numCand = ( iMode >= 0 ) ? iMode : Int(NUM_MOST_PROBABLE_MODES);

                //把MPM加入全率失真优化列表中
                for( Int j=0; j < numCand; j++)
                {
                    Bool mostProbableModeIncluded = false;
                    Int mostProbableMode = uiPreds[j];

                    for( Int i=0; i < numModesForFullRD; i++)
                    {
                        mostProbableModeIncluded |= (mostProbableMode == uiRdModeList[i]);
                    }
                    if (!mostProbableModeIncluded)
                    {
                        uiRdModeList[numModesForFullRD++] = mostProbableMode;
                    }
                }
            }
        }
        else        //不启用快速搜索,将所有模式都加入全率失真优化列表中
        {
            for( Int i=0; i < numModesForFullRD; i++)
            {
                uiRdModeList[i] = i;
            }
        }

        //===== check modes (using r-d costs) =====
#if HHI_RQT_INTRA_SPEEDUP_MOD
        UInt   uiSecondBestMode  = MAX_UINT;
        Double dSecondBestPUCost = MAX_DOUBLE;
#endif
        DEBUG_STRING_NEW(sPU)
            UInt       uiBestPUMode  = 0;       //最优模式
        Distortion uiBestPUDistY = 0;           //最优失真
        Double     dBestPUCost   = MAX_DOUBLE;      //最优代价

#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
        UInt max=numModesForFullRD;

        if (DebugOptionList::ForceLumaMode.isSet())
        {
            max=0;  // we are forcing a direction, so don't bother with mode check
        }
        for ( UInt uiMode = 0; uiMode < max; uiMode++)
#else
        //遍历全率失真优化列表
        for( UInt uiMode = 0; uiMode < numModesForFullRD; uiMode++ )
#endif
        {
            // set luma prediction mode
            UInt uiOrgMode = uiRdModeList[uiMode];      //原始模式

            //设置子块的帧内预测模式
            pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );

            DEBUG_STRING_NEW(sMode)
                // set context models
                //设置上下文模型
                m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );

            // determine residual for partition
            Distortion uiPUDistY = 0;       //初始化当前失真
            Double     dPUCost   = 0.0; //初始化当前代价
#if HHI_RQT_INTRA_SPEEDUP
            //重构帧内亮度分量,计算率失真代价
            //注意倒数第二个参数bCheckFirst是true,表示会继续按照四叉树的方式向下划分
            xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, true, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#else
            xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sMode) );
#endif

#if DEBUG_INTRA_SEARCH_COSTS
            std::cout << "2nd pass [luma,chroma] mode [" << Int(pcCU->getIntraDir(CHANNEL_TYPE_LUMA, uiPartOffset)) << "," << Int(pcCU->getIntraDir(CHANNEL_TYPE_CHROMA, uiPartOffset)) << "] cost = " << dPUCost << "\n";
#endif

            // check r-d cost
            //如果当前代价小于最优代价,将当前模式置为最优模式
            if( dPUCost < dBestPUCost )
            {
                DEBUG_STRING_SWAP(sPU, sMode)
#if HHI_RQT_INTRA_SPEEDUP_MOD
                    uiSecondBestMode  = uiBestPUMode;
                dSecondBestPUCost = dBestPUCost;
#endif
                uiBestPUMode  = uiOrgMode;
                uiBestPUDistY = uiPUDistY;
                dBestPUCost   = dPUCost;

                xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );

                if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
                {
                    const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
                    const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
                    for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
                    {
                        if (bMaintainResidual[storedResidualIndex])
                        {
                            xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
                        }
                    }
                }

                UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();

                ::memcpy( m_puhQTTempTrIdx,  pcCU->getTransformIdx()       + uiPartOffset, uiQPartNum * sizeof( UChar ) );
                for (UInt component = 0; component < numberValidComponents; component++)
                {
                    const ComponentID compID = ComponentID(component);
                    ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID  ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
                    ::memcpy( m_puhQTTempTransformSkipFlag[compID],  pcCU->getTransformSkip(compID)  + uiPartOffset, uiQPartNum * sizeof( UChar ) );
                }
            }
#if HHI_RQT_INTRA_SPEEDUP_MOD
            else if( dPUCost < dSecondBestPUCost )
            {
                uiSecondBestMode  = uiOrgMode;
                dSecondBestPUCost = dPUCost;
            }
#endif
        } // Mode loop

#if HHI_RQT_INTRA_SPEEDUP
#if HHI_RQT_INTRA_SPEEDUP_MOD
        for( UInt ui =0; ui < 2; ++ui )
#endif
        {
#if HHI_RQT_INTRA_SPEEDUP_MOD
            UInt uiOrgMode   = ui ? uiSecondBestMode  : uiBestPUMode;
            if( uiOrgMode == MAX_UINT )
            {
                break;
            }
#else
            /****************************************************已获得最优模式,使用最优模式进行预测变换量化重构等,计算最终的率失真代价*********************************************************/
            //取最优模式
            UInt uiOrgMode = uiBestPUMode;
#endif

#if ENVIRONMENT_VARIABLE_DEBUG_AND_TEST
            if (DebugOptionList::ForceLumaMode.isSet())
            {
                uiOrgMode = DebugOptionList::ForceLumaMode.getInt();
            }
#endif
            //设置子块的帧内预测模式
            pcCU->setIntraDirSubParts ( CHANNEL_TYPE_LUMA, uiOrgMode, uiPartOffset, uiDepth + uiInitTrDepth );
            DEBUG_STRING_NEW(sModeTree)
                //加载上下文模型
                // set context models
                m_pcRDGoOnSbacCoder->load( m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST] );

            // determine residual for partition
            Distortion uiPUDistY = 0;
            Double     dPUCost   = 0.0;

            //重构亮度分量
            //注意倒数第二个参数bCheckFirst是false,表示当前PU不再进行划分,即只处理当前深度的PU  
            xRecurIntraCodingLumaQT( pcOrgYuv, pcPredYuv, pcResiYuv, resiLumaPU, uiPUDistY, false, dPUCost, tuRecurseWithPU DEBUG_STRING_PASS_INTO(sModeTree));

            // check r-d cost
            //检测同一模式下,bCheckFirst为true和false的情况下,选最优  
            if( dPUCost < dBestPUCost )
            {
                DEBUG_STRING_SWAP(sPU, sModeTree)
                    uiBestPUMode  = uiOrgMode;
                uiBestPUDistY = uiPUDistY;
                dBestPUCost   = dPUCost;

                xSetIntraResultLumaQT( pcRecoYuv, tuRecurseWithPU );

                if (pps.getPpsRangeExtension().getCrossComponentPredictionEnabledFlag())
                {
                    const Int xOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).x0;
                    const Int yOffset = tuRecurseWithPU.getRect( COMPONENT_Y ).y0;
                    for (UInt storedResidualIndex = 0; storedResidualIndex < NUMBER_OF_STORED_RESIDUAL_TYPES; storedResidualIndex++)
                    {
                        if (bMaintainResidual[storedResidualIndex])
                        {
                            xStoreCrossComponentPredictionResult(resiLuma[storedResidualIndex], resiLumaPU[storedResidualIndex], tuRecurseWithPU, xOffset, yOffset, MAX_CU_SIZE, MAX_CU_SIZE );
                        }
                    }
                }

                const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
                ::memcpy( m_puhQTTempTrIdx,  pcCU->getTransformIdx()       + uiPartOffset, uiQPartNum * sizeof( UChar ) );

                for (UInt component = 0; component < numberValidComponents; component++)
                {
                    const ComponentID compID = ComponentID(component);
                    ::memcpy( m_puhQTTempCbf[compID], pcCU->getCbf( compID  ) + uiPartOffset, uiQPartNum * sizeof( UChar ) );
                    ::memcpy( m_puhQTTempTransformSkipFlag[compID],  pcCU->getTransformSkip(compID)  + uiPartOffset, uiQPartNum * sizeof( UChar ) );
                }
            }
        } // Mode loop
#endif

        DEBUG_STRING_APPEND(sDebug, sPU)

            //--- update overall distortion ---
            uiOverallDistY += uiBestPUDistY;

        //--- update transform index and cbf ---
        const UInt uiQPartNum = tuRecurseWithPU.GetAbsPartIdxNumParts();
        ::memcpy( pcCU->getTransformIdx()       + uiPartOffset, m_puhQTTempTrIdx,  uiQPartNum * sizeof( UChar ) );
        for (UInt component = 0; component < numberValidComponents; component++)
        {
            const ComponentID compID = ComponentID(component);
            ::memcpy( pcCU->getCbf( compID  ) + uiPartOffset, m_puhQTTempCbf[compID], uiQPartNum * sizeof( UChar ) );
            ::memcpy( pcCU->getTransformSkip( compID  ) + uiPartOffset, m_puhQTTempTransformSkipFlag[compID ], uiQPartNum * sizeof( UChar ) );
        }

        //--- set reconstruction for next intra prediction blocks ---
        //设置重建块
        if( !tuRecurseWithPU.IsLastSection() )
        {
            const TComRectangle &puRect=tuRecurseWithPU.getRect(COMPONENT_Y);
            const UInt  uiCompWidth   = puRect.width;
            const UInt  uiCompHeight  = puRect.height;

            const UInt  uiZOrder      = pcCU->getZorderIdxInCtu() + uiPartOffset;
            Pel*  piDes         = pcCU->getPic()->getPicYuvRec()->getAddr( COMPONENT_Y, pcCU->getCtuRsAddr(), uiZOrder );
            const UInt  uiDesStride   = pcCU->getPic()->getPicYuvRec()->getStride( COMPONENT_Y);
            const Pel*  piSrc         = pcRecoYuv->getAddr( COMPONENT_Y, uiPartOffset );
            const UInt  uiSrcStride   = pcRecoYuv->getStride( COMPONENT_Y);

            for( UInt uiY = 0; uiY < uiCompHeight; uiY++, piSrc += uiSrcStride, piDes += uiDesStride )
            {
                for( UInt uiX = 0; uiX < uiCompWidth; uiX++ )
                {
                    piDes[ uiX ] = piSrc[ uiX ];
                }
            }
        }

        //=== update PU data ====
        pcCU->setIntraDirSubParts     ( CHANNEL_TYPE_LUMA, uiBestPUMode, uiPartOffset, uiDepth + uiInitTrDepth );
    } while (tuRecurseWithPU.nextSection(tuRecurseCU));

    //设置Cbf
    if( uiNumPU > 1 )
    { // set Cbf for all blocks
        UInt uiCombCbfY = 0;
        UInt uiCombCbfU = 0;
        UInt uiCombCbfV = 0;
        UInt uiPartIdx  = 0;
        for( UInt uiPart = 0; uiPart < 4; uiPart++, uiPartIdx += uiQNumParts )
        {
            uiCombCbfY |= pcCU->getCbf( uiPartIdx, COMPONENT_Y,  1 );
            uiCombCbfU |= pcCU->getCbf( uiPartIdx, COMPONENT_Cb, 1 );
            uiCombCbfV |= pcCU->getCbf( uiPartIdx, COMPONENT_Cr, 1 );
        }
        for( UInt uiOffs = 0; uiOffs < 4 * uiQNumParts; uiOffs++ )
        {
            pcCU->getCbf( COMPONENT_Y  )[ uiOffs ] |= uiCombCbfY;
            pcCU->getCbf( COMPONENT_Cb )[ uiOffs ] |= uiCombCbfU;
            pcCU->getCbf( COMPONENT_Cr )[ uiOffs ] |= uiCombCbfV;
        }
    }

    //===== reset context models =====
    //重置上下文模型
    m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);

    //===== set distortion (rate and r-d costs are determined later) =====
    //设置总失真
    pcCU->getTotalDistortion() = uiOverallDistY;
}

你可能感兴趣的:(HM,HEVC,HEVC代码学习)