Search::predInterSearch()

/* find the best inter prediction for each PU of specified mode 
   为CU中的每一个PU找到最佳inter pred,并累加上其ME cost
   最佳inter pred指的是mv,mvd,mvpIdx,dir,list等

   过程:
	1.遍历每一个PU,以PU为计算单位
		1.为运动估计加载PU的YUV,初始化失真函数指针,设置运动估计方法及下采样等级
		2.进行merge估计,得到最优merge备选MV及其cost
		3.初始化前后向运动估计bestME[dir]开销为MAX,用来存储当前运动方向上的最优预测
		4.得到当前block模式的bits开销
		5.加载当前PU相邻PU的MV
		
		6.初始化前后向运动估计的ref,加载其refMask
		7.对当前PU,针对其每个预测方向(前向/后向)遍历,找到当前预测方向上的最优预测bestME[dir]
			1.对当前PU,当前预测方向,遍历参考帧列表中每一帧
				1.若refMask表示当前参考帧不可用,则跳过该参考帧分析
				2.累计bits = block模式bits + mvp_idx_bits + refIdx_bits
				3.构造AMVP备选集
				4.基于sad,选择AMVP备选集中最优MVP
				5.根据最优MVP及searchRange设置运动估计搜索窗口
				6.进行运动估计,得到最优MV及其satd
				
				7.累计bits+=AMVP最优MVP与运动估计最优MV之差的bits
				8.计算运动估计最优MV的mvcost
				9.得到最优MV的cost
				10.若当前cost小于bestME[dir]的cost,则更新
		8.若是Bslice,且没有双向预测限制,且PUsize!=2Nx2N,且前后向都又bestME,则进行双向预测计算
			1.取前后向的bestME为双向预测的前后向MV
			2.计算satd
				·若bChromaSATD,则
					1.拷贝前后向的MV及其refIdx
					2.进行运动估计
					3.累加Luma和Chroma的satd到satd
				·若非bChromaSATD,则
					1.取前后向参考帧的YUV
					2.对前后向参考像素进行均值计算,结果即为双向参考预测像素
					3.计算其satd
			3.计算双向预测的bits及cost
			4.若前后向运动估计最优MV存在非0向量,则以0向量为中心重新设置搜索窗口,并检查mvp是否超出窗口
			5.若前后向运动估计最优MV存在非0向量,且mvp都没有超出搜索窗口,则
				·若bChromaSATD
					1.则设置mv为0向量
					2.进行运动补偿
					3.并计算重新累计luma和chroma的satd
				·若非bChromaSATD
					1.取前后向参考帧co-located像素值(其实也就是0向量)
					2.对前后向参考帧co-located像素值进行均值计算,得到0向量的预测像素
					3.进行satd计算
			6.以0向量为运动估计最优MV,重新计算MVP,得到其mvp, mvpidx, bits, cost
			7.若MV为0向量且重新计算后的mvp的cost小于最初计算的双向预测cost,则将其更新为双向预测最优
		9.为当前PU选择最优的预测模式
			·merge最优,写mergeFlag为true、记录mergeIdx、将merge前后向的MV和refIdx记录到PU
			·双向最优,设置PU预测方向为双向,写mergeFlag为false、将双向预测前后向的MV和refIdx记录到PU,记录双向mvpIdx和mvd
			·前向最优,设置PU预测方向为前向,写mergeFlag为false、将前向预测的MV和refIdx记录到PU,记录前向mvpIdx和mvd
			·后向最优,设置PU预测方向为后向,写mergeFlag为false、将后向预测的MV和refIdx记录到PU,记录后向mvpIdx和mvd
		10.对当前PU的最优预测模式进行运动补偿
	2.累加上运动估计的bits到当前interMode的bits上,并返回
*/
void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2])
{
    ProfileCUScope(interMode.cu, motionEstimationElapsedTime, countMotionEstimate);

	//取interMode的CUData
    CUData& cu = interMode.cu;
	//取interMode的predYUV
    Yuv* predYuv = &interMode.predYuv;

    /* 12 mv candidates including lowresMV
		低分辨率lowresMV为之前进行lookahead时候对1/2下采样后的帧进行运动估计的MV*/
	MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2];

    const Slice *slice = m_slice;
    int numPart     = cu.getNumPartInter(0);
	//单向预测?双向预测?
    int numPredDir  = slice->isInterP() ? 1 : 2;
	//参考帧列表中帧数量
    const int* numRefIdx = slice->m_numRefIdx;
	//lastMode记录上一个PU的dir,前向0后向1双向2
    uint32_t lastMode = 0;
    int      totalmebits = 0;
    MV       mvzero(0, 0);
    Yuv&     tmpPredYuv = m_rqt[cuGeom.depth].tmpPredYuv;
    MergeData merge;
    memset(&merge, 0, sizeof(merge));
    bool useAsMVP = false;

	//遍历每个PU
    for (int puIdx = 0; puIdx < numPart; puIdx++)
    {
		//取当前PU的bestME
        MotionData* bestME = interMode.bestME[puIdx];
		//构造PU
        PredictionUnit pu(cu, cuGeom, puIdx);

		//为m_me加载PU的YUV,并初始化失真计算函数指针,设置运动估计方法及下采样等级
        m_me.setSourcePU(*interMode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, bChromaMC);
        //初始化useAsMVP为false
		useAsMVP = false;
		//定义初始化x265_analysis_inter_data
        x265_analysis_inter_data* interDataCTU = NULL;
		//定义并计算当前CU的索引
        int cuIdx;
        cuIdx = (interMode.cu.m_cuAddr * m_param->num4x4Partitions) + cuGeom.absPartIdx;
		//analysisReuseLevel = 0 且 interRefine>1
        if (m_param->analysisReuseLevel == 10 && m_param->interRefine > 1)
        {
			//加载API外加载的编码分析数据
            interDataCTU = m_frame->m_analysisData.interData;

			/*
				若interMode的predMode = 加载的predMode
			  &&  interMode的partSize = 加载的partSize
			  &&  interMode的Depth	  = 加载的Depth
			  &&  加载的数据中,当前PU不merge
			  ==》useAsMVP = true
			*/
            if ((cu.m_predMode[pu.puAbsPartIdx] == interDataCTU->modes[cuIdx + pu.puAbsPartIdx])
                && (cu.m_partSize[pu.puAbsPartIdx] == interDataCTU->partSize[cuIdx + pu.puAbsPartIdx])
                && !(interDataCTU->mergeFlag[cuIdx + puIdx])
                && (cu.m_cuDepth[0] == interDataCTU->depth[cuIdx]))
                useAsMVP = true;
        }

        /* find best cost merge candidate. note: 2Nx2N merge and bidir are handled as separate modes 
		   找到最优merge备选集,并返回其cost */
        uint32_t mrgCost = numPart == 1 ? MAX_UINT : mergeEstimation(cu, cuGeom, pu, puIdx, merge);
        //初始化前后向bestME的cost为MAX
		bestME[0].cost = MAX_UINT;
        bestME[1].cost = MAX_UINT;

		//得到block模式(模式包含partSize和预测方向)的bits开销,即2Nx2N、2NxN、2NxnU等等
        getBlkBits((PartSize)cu.m_partSize[0], slice->isInterP(), puIdx, lastMode, m_listSelBits);
        bool bDoUnidir = true;

		//加载当前PU相邻MV备选集到interNeighbours中
        cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours);

        /* Uni-directional prediction 
			analysisLoad && analysisReuseLevel > 1 && analysisReuseLevel != 10
		或	analysisMultiPassRefine && bStatRead
		或	bAnalysisType == AVC_INFO
		或	useAsMVP */
        if ((m_param->analysisLoad && m_param->analysisReuseLevel > 1 && m_param->analysisReuseLevel != 10)
            || (m_param->analysisMultiPassRefine && m_param->rc.bStatRead) || (m_param->bAnalysisType == AVC_INFO) || (useAsMVP))
        {
			//遍历前后向参考方向
            for (int list = 0; list < numPredDir; list++)
            {
				//初始化ref = -1
                int ref = -1;

				//加载当前PU当前预测方向上的ref
                if (useAsMVP) //是否有API外信息载入,若有则加载信息
                    ref = interDataCTU->refIdx[list][cuIdx + puIdx];
				else //若无则加载前面分析计算的bestME的ref
                    ref = bestME[list].ref;

				//若仍然无ref,则该参考方向不可用,continue
                if (ref < 0)
                    continue;

				//累加bits,即之前的block_bits + mvp_idx_bits + refIdx_bits
                uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
                bits += getTUBits(ref, numRefIdx[list]);
				
				//加载AMVP备选集到amvpCand中
                int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
                //取AMVP备选集
				const MV* amvp = interMode.amvpCand[list][ref];
				//选择AMVP备选集中最优备选MV,基于sad开销
                int mvpIdx = selectMVP(cu, pu, amvp, list, ref);

                MV mvmin, mvmax, outmv, mvp;
				//取最优AMVP备选MV
                mvp = amvp[mvpIdx];
				//若运动估计算法为SEA
                if (m_param->searchMethod == X265_SEA)
                {
                    int puX = puIdx & 1;
                    int puY = puIdx >> 1;
                    for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
                        m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
                }
				//根据mvp计算搜索范围,输出到[mvmin,mvmax]
                setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
                MV mvpIn = mvp;
                int satdCost;
                if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx)
                    mvpIn = bestME[list].mv;
                if (useAsMVP)
                {
                    MV bestmv, mvpSel[3];
                    int mvpIdxSel[3];
                    satdCost = m_me.COST_MAX;
                    mvpSel[0] = interDataCTU->mv[list][cuIdx + puIdx].word;
                    mvpIdxSel[0] = interDataCTU->mvpIdx[list][cuIdx + puIdx];
                    if (m_param->mvRefine > 1)
                    {
                        mvpSel[1] = interMode.amvpCand[list][ref][mvpIdx];
                        mvpIdxSel[1] = mvpIdx;
                        if (m_param->mvRefine > 2)
                        {
                            mvpSel[2] = interMode.amvpCand[list][ref][!mvpIdx];
                            mvpIdxSel[2] = !mvpIdx;
                        }
                    }
                    for (int cand = 0; cand < m_param->mvRefine; cand++)
					{
                        if (cand && (mvpSel[cand] == mvpSel[cand - 1] || (cand == 2 && mvpSel[cand] == mvpSel[cand - 2])))
                            continue;
                        setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
                        int bcost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvpSel[cand], numMvc, mvc, m_param->searchRange, bestmv, m_param->maxSlices,
                            m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
                        if (satdCost > bcost)
                        {
                            satdCost = bcost;
                            outmv = bestmv;
                            mvp = mvpSel[cand];
                            mvpIdx = mvpIdxSel[cand];
                        }
                    }
                } // end of if (useAsMVP)
                else
                {
                    satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices,
                        m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
                }

                /* Get total cost of partition, but only include MV bit cost once */
                bits += m_me.bitcost(outmv);
                uint32_t mvCost = m_me.mvcost(outmv);
                uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
                /* Refine MVP selection, updates: mvpIdx, bits, cost */
                if (!(m_param->analysisMultiPassRefine || useAsMVP))
                    mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);
                else
                {
                    /* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here 
                      the actual mvp is bestME from pass 1 for that mvpIdx */
                    int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn);
                    if (diffBits < 0)
                    {
                        mvpIdx = !mvpIdx;
                        uint32_t origOutBits = bits;
                        bits = origOutBits + diffBits;
                        cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits);
                    }
                    mvp = amvp[mvpIdx];
                }

                if (cost < bestME[list].cost)
                {
                    bestME[list].mv = outmv;
                    bestME[list].mvp = mvp;
                    bestME[list].mvpIdx = mvpIdx;
                    bestME[list].cost = cost;
                    bestME[list].bits = bits;
                    bestME[list].mvCost  = mvCost;
                    bestME[list].ref = ref;
                }
                bDoUnidir = false;
            }            
        }
		//多线程并行运动估计,一个参考帧一个线程
        else if (m_param->bDistributeMotionEstimation)
		{
            PME pme(*this, interMode, cuGeom, pu, puIdx);
            pme.m_jobTotal = 0;
            pme.m_jobAcquired = 1; /* reserve L0-0 or L1-0 */

            uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;
            for (int list = 0; list < numPredDir; list++)
            {
                int idx = 0;
                for (int ref = 0; ref < numRefIdx[list]; ref++)
                {
                    if (!(refMask & (1 << ref)))
                        continue;

                    pme.m_jobs.ref[list][idx++]  = ref;
                    pme.m_jobTotal++;
                }
                pme.m_jobs.refCnt[list] = idx;

                /* the second list ref bits start at bit 16 */
                refMask >>= 16;
            }

            if (pme.m_jobTotal > 2)
            {
                pme.tryBondPeers(*m_frame->m_encData->m_jobProvider, pme.m_jobTotal - 1);

                processPME(pme, *this);

                int ref = pme.m_jobs.refCnt[0] ? pme.m_jobs.ref[0][0] : pme.m_jobs.ref[1][0];
                singleMotionEstimation(*this, interMode, pu, puIdx, 0, ref); /* L0-0 or L1-0 */

                bDoUnidir = false;

                ProfileCUScopeNamed(pmeWaitScope, interMode.cu, pmeBlockTime, countPMEMasters);
                pme.waitForExit();
            }

            /* if no peer threads were bonded, fall back to doing unidirectional
             * searches ourselves without overhead of singleMotionEstimation() */
        } //end of if (m_param->bDistributeMotionEstimation)


        if (bDoUnidir)
        {
			//初始化前后向最优运动估计的ref = -1
            interMode.bestME[puIdx][0].ref = interMode.bestME[puIdx][1].ref = -1;
			/*	refMask一共四字节32bits,
				低16bits表示前向参考帧列表中各帧是否可用
			    高16bits表示后向参考帧列表中各帧是否可用 */
            uint32_t refMask = refMasks[puIdx] ? refMasks[puIdx] : (uint32_t)-1;

            for (int list = 0; list < numPredDir; list++) ////遍历每个预测方向
            {
				//遍历当前预测方向参考列表中每一帧
				for (int ref = 0; ref < numRefIdx[list]; ref++)
                {
                    ProfileCounter(interMode.cu, totalMotionReferences[cuGeom.depth]);

					//若refMask标注当前帧不可用,则continue
                    if (!(refMask & (1 << ref)))
                    {
                        ProfileCounter(interMode.cu, skippedMotionReferences[cuGeom.depth]);
                        continue;
                    }

					//累计bits = mode_bits + MVP_idx_bits + refIdx_bits
                    uint32_t bits = m_listSelBits[list] + MVP_IDX_BITS;
                    bits += getTUBits(ref, numRefIdx[list]);
					
					//构造AMVP备选集
                    int numMvc = cu.getPMV(interMode.interNeighbours, list, ref, interMode.amvpCand[list][ref], mvc);
                    const MV* amvp = interMode.amvpCand[list][ref];
					//基于sad选AMVP备选集中的最优MVP
                    int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
                    MV mvmin, mvmax, outmv, mvp_lowres;
					MV mvp = amvp[mvpIdx];	//amvp最优mvp
                    bool bLowresMVP = false;

					//若既不analysisSave,也不analysisLoad
                    if (!m_param->analysisSave && !m_param->analysisLoad) /* Prevents load/save outputs from diverging when lowresMV is not available */
                    {
						//得到低分辨率的运动向量lmv
                        MV lmv = getLowresMV(cu, pu, list, ref);
                        if (lmv.notZero())	//若低分辨率的mv可用,则存储下来
                            mvc[numMvc++] = lmv;
                        if (m_param->bEnableHME) //若允许层级运动估计,则拷贝lmv到mvp_lowres
                            mvp_lowres = lmv;
                    }

					//若运动估计算法为SEA,则进行配置??
                    if (m_param->searchMethod == X265_SEA)
                    {
                        int puX = puIdx & 1;
                        int puY = puIdx >> 1;
                        for (int planes = 0; planes < INTEGRAL_PLANE_NUM; planes++)
                            m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride;
                    }

					//基于amvp最优备选MV和searchRange设置搜索窗口[mvmin~mvmax]
                    setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
					//进行运动估计,得到最优MV到outmv,及其satd
                    int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, m_param->maxSlices, 
                      m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);

					//使用3层级像素运动估计 && mvp低分辨率非零 && mvp低分辨率!=mvp
                    if (m_param->bEnableHME && mvp_lowres.notZero() && mvp_lowres != mvp)
                    {
                        MV outmv_lowres;
                        setSearchRange(cu, mvp_lowres, m_param->searchRange, mvmin, mvmax);
                        int lowresMvCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp_lowres, numMvc, mvc, m_param->searchRange, outmv_lowres, m_param->maxSlices,
                            m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
                        if (lowresMvCost < satdCost)
                        {
                            outmv = outmv_lowres;
                            satdCost = lowresMvCost;
                            bLowresMVP = true;
                        }
                    }

                    /* Get total cost of partition, but only include MV bit cost once */
					//累加AMVP的最优MVP与运动估计最优MV之间差值mvd的bit开销
                    bits += m_me.bitcost(outmv);
                    uint32_t mvCost = m_me.mvcost(outmv);
                    uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits);
                    /* Update LowresMVP to best AMVP cand*/
                    if (bLowresMVP)
                        updateMVP(amvp[mvpIdx], outmv, bits, cost, mvp_lowres);

                    /* Refine MVP selection, updates: mvpIdx, bits, cost 
					   检查是否AMVP中另一个备选项相对当前的MVP,cost更优?若是则更新 */
                    mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost);

					//更新当前list方向的最优运动信息
                    if (cost < bestME[list].cost)
                    {
                        bestME[list].mv      = outmv;	//运动估计最优MV
                        bestME[list].mvp     = mvp;		//AMVP最优MV
                        bestME[list].mvpIdx  = mvpIdx;	//AMVP最优MV在备选集中的索引
                        bestME[list].ref     = ref;		//参考帧索引
                        bestME[list].cost    = cost;	//开销
                        bestME[list].bits    = bits;	//bist开销
                        bestME[list].mvCost  = mvCost;	//mvCost
                    }
                }// end of for (int ref = 0; ref < numRefIdx[list]; ref++)

                /* the second list ref bits start at bit 16 
				   取后16bits为后向参考帧的refMask情况 */
                refMask >>= 16;

            }// end of for (int list = 0; list < numPredDir; list++)
        }// end of if (bDoUnidir)

        /* Bi-directional prediction 
			双向预测	*/
        MotionData bidir[2];
        uint32_t bidirCost = MAX_UINT;
        int bidirBits = 0;

		//Bslice && 当前CU无双向预测限制 && pu非2Nx2N  && 前后向都存在bestME
        if (slice->isInterB() && !cu.isBipredRestriction() &&  /* biprediction is possible for this PU */
            cu.m_partSize[pu.puAbsPartIdx] != SIZE_2Nx2N &&    /* 2Nx2N biprediction is handled elsewhere */
            bestME[0].cost != MAX_UINT && bestME[1].cost != MAX_UINT)
        {
			//记录bestME
            bidir[0] = bestME[0];
            bidir[1] = bestME[1];

            int satdCost;

            if (m_me.bChromaSATD)	//若bChromaSATD
            {
				//拷贝前向MV及其refIdx
                cu.m_mv[0][pu.puAbsPartIdx] = bidir[0].mv;
                cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref;
                //拷贝后向MV及其refIdx
				cu.m_mv[1][pu.puAbsPartIdx] = bidir[1].mv;
                cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref;
                //进行luma&&chroma运动补偿
				motionCompensation(cu, pu, tmpPredYuv, true, true);
				//累加luma和chroma的satd
                satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) +
                           m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx);
            }
            else
            {
				//取前后向参考帧的YUV
                PicYuv* refPic0 = slice->m_refReconPicList[0][bestME[0].ref];
                PicYuv* refPic1 = slice->m_refReconPicList[1][bestME[1].ref];
                //取当前的predYUV
				Yuv* bidirYuv = m_rqt[cuGeom.depth].bidirPredYuv;

                /* Generate reference subpels */ 
				//将前向参考像素拷贝到bidirYuv[0]中
                predInterLumaPixel(pu, bidirYuv[0], *refPic0, bestME[0].mv);
				//将后向参考像素拷贝到bidirYuv[1]中
                predInterLumaPixel(pu, bidirYuv[1], *refPic1, bestME[1].mv);
				//对前向后向参考的像素进行均值计算到tmpPredYuv中
                primitives.pu[m_me.partEnum].pixelavg_pp[(tmpPredYuv.m_size % 64 == 0) && (bidirYuv[0].m_size % 64 == 0) && (bidirYuv[1].m_size % 64 == 0)](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, bidirYuv[0].getLumaAddr(pu.puAbsPartIdx), bidirYuv[0].m_size,
                                                                                                 bidirYuv[1].getLumaAddr(pu.puAbsPartIdx), bidirYuv[1].m_size, 32);
                //计算tmpPredYuv的satd
				satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
            }

			//计算双向预测的bits开销
            bidirBits = bestME[0].bits + bestME[1].bits + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
            //计算双向预测的cost
			bidirCost = satdCost + m_rdCost.getCost(bidirBits);

			//前向&&后向的运动估计向量是否非0向量
            bool bTryZero = bestME[0].mv.notZero() || bestME[1].mv.notZero();
            if (bTryZero) //若存在非0向量
            {
                /* Do not try zero MV if unidir motion predictors are beyond
                 * valid search area */
                MV mvmin, mvmax;
                int merange = X265_MAX(m_param->sourceWidth, m_param->sourceHeight);
				//重置搜索范围,以0向量为中心,merange为范围,得到搜索范围[mvmin,mvmax]
                setSearchRange(cu, mvzero, merange, mvmin, mvmax);
                mvmax.y += 2; // there is some pad for subpel refine
				//搜索范围超分4倍
                mvmin <<= 2;
                mvmax <<= 2;
				//检查mvp是否超过搜索范围
                bTryZero &= bestME[0].mvp.checkRange(mvmin, mvmax);
                bTryZero &= bestME[1].mvp.checkRange(mvmin, mvmax);
            }
            if (bTryZero)	//若存在非0向量,且mvp都在搜索范围内
            {
                /* coincident blocks of the two reference pictures */
                if (m_me.bChromaSATD)	//若bChromaSATD
                {
					//设置mv为0向量
                    cu.m_mv[0][pu.puAbsPartIdx] = mvzero;
                    cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)bidir[0].ref;
                    cu.m_mv[1][pu.puAbsPartIdx] = mvzero;
                    cu.m_refIdx[1][pu.puAbsPartIdx] = (int8_t)bidir[1].ref;
					//进行运动补偿
                    motionCompensation(cu, pu, tmpPredYuv, true, true);
					//累加luma和chroma的satd
                    satdCost = m_me.bufSATD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size) +
                               m_me.bufChromaSATD(tmpPredYuv, pu.puAbsPartIdx);
                }
                else	//若非bChromaSATD
                {
					//取前向预测co-located参考像素
                    const pixel* ref0 = m_slice->m_mref[0][bestME[0].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx);
                    //取后向预测co-located参考像素
					const pixel* ref1 = m_slice->m_mref[1][bestME[1].ref].getLumaAddr(pu.ctuAddr, pu.cuAbsPartIdx + pu.puAbsPartIdx);
                    //得到stride
					intptr_t refStride = slice->m_mref[0][0].lumaStride;
					//进行前后向参考像素均值计算
                    primitives.pu[m_me.partEnum].pixelavg_pp[(tmpPredYuv.m_size % 64 == 0) && (refStride % 64 == 0)](tmpPredYuv.m_buf[0], tmpPredYuv.m_size, ref0, refStride, ref1, refStride, 32);
                    //得到satd
					satdCost = m_me.bufSATD(tmpPredYuv.m_buf[0], tmpPredYuv.m_size);
                }
				//得到前向mvp及mvpIdx
                MV mvp0 = bestME[0].mvp;
                int mvpIdx0 = bestME[0].mvpIdx;
				//计算前向me的bits
                uint32_t bits0 = bestME[0].bits - m_me.bitcost(bestME[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0);
				//得到后向mvp及mvpIdx
                MV mvp1 = bestME[1].mvp;
                int mvpIdx1 = bestME[1].mvpIdx;
				//计算后向me的bits
                uint32_t bits1 = bestME[1].bits - m_me.bitcost(bestME[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1);
				//累计cost
                uint32_t cost = satdCost + m_rdCost.getCost(bits0) + m_rdCost.getCost(bits1);

                /* refine MVP selection for zero mv, updates: mvp, mvpidx, bits, cost 
					以mvzero为最优运动估计向量,重新计算最优MVP,并输出其mvzero,mvpIdx,bits及cost*/
                mvp0 = checkBestMVP(interMode.amvpCand[0][bestME[0].ref], mvzero, mvpIdx0, bits0, cost);
                mvp1 = checkBestMVP(interMode.amvpCand[1][bestME[1].ref], mvzero, mvpIdx1, bits1, cost);

				//若MV为0向量且从小计算后的mvp的cost小于最初计算的双向预测cost,则更新
                if (cost < bidirCost)
                {
                    bidir[0].mv = mvzero;
                    bidir[1].mv = mvzero;
                    bidir[0].mvp = mvp0;
                    bidir[1].mvp = mvp1;
                    bidir[0].mvpIdx = mvpIdx0;
                    bidir[1].mvpIdx = mvpIdx1;
                    bidirCost = cost;
                    bidirBits = bits0 + bits1 + m_listSelBits[2] - (m_listSelBits[0] + m_listSelBits[1]);
                }
            }
        }

        /* select best option and store into CU 
			为当前PU在 前向/后向/双向/merge 四种中最优的模式 */
		//若merge最优
        if (mrgCost < bidirCost && mrgCost < bestME[0].cost && mrgCost < bestME[1].cost)
        {
			//标记当前PU为merge
            cu.m_mergeFlag[pu.puAbsPartIdx] = true;
            
			cu.m_mvpIdx[0][pu.puAbsPartIdx] = merge.index; /* merge candidate ID is stored in L0 MVP idx */
           
			cu.setPUInterDir(merge.dir, pu.puAbsPartIdx, puIdx);
            cu.setPUMv(0, merge.mvField[0].mv, pu.puAbsPartIdx, puIdx);
            cu.setPURefIdx(0, merge.mvField[0].refIdx, pu.puAbsPartIdx, puIdx);
            cu.setPUMv(1, merge.mvField[1].mv, pu.puAbsPartIdx, puIdx);
            cu.setPURefIdx(1, merge.mvField[1].refIdx, pu.puAbsPartIdx, puIdx);

            totalmebits += merge.bits;
        }
		//若双向最优
        else if (bidirCost < bestME[0].cost && bidirCost < bestME[1].cost)
        {
			//lastMode为2,表示双向,为下一PU的getBlkBits服务
            lastMode = 2;

			//标记PU的mergeFlag为false
            cu.m_mergeFlag[pu.puAbsPartIdx] = false;
            //设置dir为双向
			cu.setPUInterDir(3, pu.puAbsPartIdx, puIdx);

			/* 前向 */
            //设置前向MV和refIdx
			cu.setPUMv(0, bidir[0].mv, pu.puAbsPartIdx, puIdx);
            cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx);
			//记录前向mvd和mvpIdx
            cu.m_mvd[0][pu.puAbsPartIdx] = bidir[0].mv - bidir[0].mvp;
            cu.m_mvpIdx[0][pu.puAbsPartIdx] = bidir[0].mvpIdx;

			/* 后向 */
			//设置后向MV和refIdx
            cu.setPUMv(1, bidir[1].mv, pu.puAbsPartIdx, puIdx);
            cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx);
            //记录后向mvd和mvpIdx
			cu.m_mvd[1][pu.puAbsPartIdx] = bidir[1].mv - bidir[1].mvp;
            cu.m_mvpIdx[1][pu.puAbsPartIdx] = bidir[1].mvpIdx;

			//记录下PU的运动估计bits开销
            totalmebits += bidirBits;
        }
		//若前向最优
        else if (bestME[0].cost <= bestME[1].cost)
        {
			//lastMode为0,表示前向,为下一PU的getBlkBits服务
            lastMode = 0;

			//标记PU的mergeflag为false
            cu.m_mergeFlag[pu.puAbsPartIdx] = false;
			//记录PU预测方向为前向
            cu.setPUInterDir(1, pu.puAbsPartIdx, puIdx);

			//记录PU的前向预测MV和refIdx
            cu.setPUMv(0, bestME[0].mv, pu.puAbsPartIdx, puIdx);
            cu.setPURefIdx(0, bestME[0].ref, pu.puAbsPartIdx, puIdx);
			//记录PU的前向预测mvd和mvpIdx
            cu.m_mvd[0][pu.puAbsPartIdx] = bestME[0].mv - bestME[0].mvp;
            cu.m_mvpIdx[0][pu.puAbsPartIdx] = bestME[0].mvpIdx;

			//置PU的后向RefIdx无效,且MV为0
            cu.setPURefIdx(1, REF_NOT_VALID, pu.puAbsPartIdx, puIdx);
            cu.setPUMv(1, mvzero, pu.puAbsPartIdx, puIdx);

            totalmebits += bestME[0].bits;
        }
		//若后向最优
        else
        {
			//lastMode为1,表示后向,为下一PU的getBlkBits服务
            lastMode = 1;

			//标记PU的mergeflag为false
            cu.m_mergeFlag[pu.puAbsPartIdx] = false;
            //记录PU预测方向为后向
			cu.setPUInterDir(2, pu.puAbsPartIdx, puIdx);

            //记录PU的后向预测MV和refIdx
			cu.setPUMv(1, bestME[1].mv, pu.puAbsPartIdx, puIdx);
			cu.setPURefIdx(1, bestME[1].ref, pu.puAbsPartIdx, puIdx);
            //记录PU的后向预测mvd和mvpIdx
			cu.m_mvd[1][pu.puAbsPartIdx] = bestME[1].mv - bestME[1].mvp;
			cu.m_mvpIdx[1][pu.puAbsPartIdx] = bestME[1].mvpIdx;

			//置PU的前向RefIdx无效,且MV为0
            cu.setPURefIdx(0, REF_NOT_VALID, pu.puAbsPartIdx, puIdx);
            cu.setPUMv(0, mvzero, pu.puAbsPartIdx, puIdx);

			//累加上当前PU的运动估计bits开销
            totalmebits += bestME[1].bits;
        }

		//进行运动补偿
        motionCompensation(cu, pu, *predYuv, true, bChromaMC);
    } //end of for (int puIdx = 0; puIdx < numPart; puIdx++)

	//记录下当前CU的运动估计bits
    interMode.sa8dBits += totalmebits;
}

你可能感兴趣的:(X265)