CostEstimateGroup::estimateCUCost()

/*
	依赖线程tld来执行帧b中的CU(cuX, cuY)以p0为前向参考,p1为后向参考的satd
	inter_satd = min{intra_satd, inter_satd}

过程:
	1.分别取低分辨率前向参考帧p0,后向参考帧p1,当前待分析帧b
	2.得到帧在长宽上CU的个数widthInCU/heightInCU
	3.得到当前CU align后的序号cuXY
	4.得到低分辨率的CU尺寸cuSize
	5.得到当前CU align后的像素偏移量pelOffset
	6.加载运动估计的一些信息:失真函数、运动估计算法、YUV像素等
	7.将mv限制在帧范围内[mvmin, mvmax]
	8.分别遍历两个预测方向,得到各个预测方向上的最优satd
		1.取lowerResMvCosts
		2.若不需要对该方向进行search,则表明之前已经计算过,直接更新后continue
		3.取低分辨率运动向量
		4.建立伪mvp集mvc,由于执行estimateCUCost()函数是逆zigzag顺序进行,所以这里的mvc与协议上的mvp位置相反,个数相同5个
			1.若CU不是最后一列,则将右边CU的mv放进mvc
			2.若CU不是最后一行
				1.将下面CU的mv放进mvc
				2.若CU不是第一列,则将左下角CU的mv放进mvc
				3.若CU不是最后一列,则将右下角CU的mv放进mvc
			3.这个mvc备选集不知道什么意义
		5.遍历mvc中的每个mv,找到最优的mv,即mvp
			1.进行运动补偿
			2.计算satd
			3.更新最优mvpcost和mvp
		6.以mvp为中心,在[mvmin, mvmax]范围内进行运动估计,得到运动估计最优低分辨率mv,返回其satd
		7.更新最优预测方向及其satd
	9.若允许双向预测,则是Bslice,则计算双向预测的最优satd
		1.分别以之前前后向运动估计得到的最优mv进行像素参考
		2.得到的前后向最优mv参考像素进行均值计算,并计算satd
		3.更新最优预测方向及其satd
		4.分别得到前向/后向参考帧的co-located CU像素
		5.得到前向/后向参考帧的co-located CU像素的均值,并计算satd
		6.更新最优预测方向及其satd
	10.若不允许双向预测,则是Pslice,还要考虑intra的satd
		1.先将之前得到的inter satd加上一个惩罚lowresPenalty
		2.对比之前计算的intra satd,更新最优预测方向及其satd
	11.判断当前CU是否是边界CU,边界CU不能计算在帧satd中,因为他们不准
	12.基于satd来计算aq satd
	13.若是不是边界CU,则分别将satd和aq satd加入到帧/slice的satd/aq satd中
	14.累加aq satd到行satd中
	15.累加satd到低分辨率satd中
*/
void CostEstimateGroup::estimateCUCost(LookaheadTLD& tld, int cuX, int cuY, int p0, int p1, int b, bool bDoSearch[2], bool lastRow, int slice, bool hme)
{
	// 分别取前向参考帧p0,后向参考帧p1,当前待计算帧b
    Lowres *fref0 = m_frames[p0];
    Lowres *fref1 = m_frames[p1];
    Lowres *fenc  = m_frames[b];

	// 若前向参考是权重的,且没开启层级运动估计,则取权重的前向参考帧,否则取原始前向参考帧
    ReferencePlanes *wfref0 = (fenc->weightedRef[b - p0].isWeighted && !hme) ? &fenc->weightedRef[b - p0] : fref0;

	// 根据是否hme来得到帧在长宽上的CU个数
    const int widthInCU = hme ? m_lookahead.m_4x4Width : m_lookahead.m_8x8Width;
    const int heightInCU = hme ? m_lookahead.m_4x4Height : m_lookahead.m_8x8Height;
    // 若p1>b则双向预测
	const int bBidir = (b < p1);
	// align后的CU偏移量
    const int cuXY = cuX + cuY * widthInCU;
    const int cuXY_4x4 = (cuX / 2) + (cuY / 2) * widthInCU / 2;
	// 低分辨率CU尺寸
    const int cuSize = X265_LOWRES_CU_SIZE;
	// align后的像素偏移量
    const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * (hme ? fenc->lumaStride/2 : fenc->lumaStride);

	// 载入运动估计必要信息:失真函数、运动估计算法、YUV像素等
    if ((bBidir || bDoSearch[0] || bDoSearch[1]) && hme)
        tld.me.setSourcePU(fenc->lowerResPlane[0], fenc->lumaStride / 2, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);
	else if((bBidir || bDoSearch[0] || bDoSearch[1]) && !hme)
        tld.me.setSourcePU(fenc->lowresPlane[0], fenc->lumaStride, pelOffset, cuSize, cuSize, X265_HEX_SEARCH, m_lookahead.m_param->hmeSearchMethod[0], m_lookahead.m_param->hmeSearchMethod[1], 1);


    /* A small, arbitrary bias to avoid VBV problems caused by zero-residual lookahead blocks. */
    int lowresPenalty = 4;
    int listDist[2] = { b - p0, p1 - b};

    MV mvmin, mvmax;
    int bcost = tld.me.COST_MAX;
    int listused = 0;

    // TODO: restrict to slices boundaries
    // establish search bounds that don't cross extended frame boundaries
	// 限制mv范围在帧尺寸内
    mvmin.x = (int32_t)(-cuX * cuSize - 8);
    mvmin.y = (int32_t)(-cuY * cuSize - 8);
    mvmax.x = (int32_t)((widthInCU - cuX - 1) * cuSize + 8);
    mvmax.y = (int32_t)((heightInCU - cuY - 1) * cuSize + 8);

	// 遍历运动方向
    for (int i = 0; i < 1 + bBidir; i++)
    {
		// 取fencCost
        int& fencCost = hme ? fenc->lowerResMvCosts[i][listDist[i]][cuXY] : fenc->lowresMvCosts[i][listDist[i]][cuXY];
        int skipCost = INT_MAX;

		// 如果不需要对该方向进行search,则表明之前已经计算过,直接更新
        if (!bDoSearch[i])
        {
            COPY2_IF_LT(bcost, fencCost, listused, i + 1);
            continue;
        }

        int numc = 0;
        MV mvc[5], mvp;
		// 取低分辨率运动向量
        MV* fencMV = hme ? &fenc->lowerResMvs[i][listDist[i]][cuXY] : &fenc->lowresMvs[i][listDist[i]][cuXY];
        ReferencePlanes* fref = i ? fref1 : wfref0;

        /* Reverse-order MV prediction
			建立伪mvp集mvc,由于estimateCUCost()是逆zigzag进行调用的
			所以这里的mvc与协议上的mvp位置相反
			问题:为什么要逆zigzag调用?直接按正常来不行么? */
#define MVC(mv) mvc[numc++] = mv;
		// 若CU不是最后一列,则将右边的mv放进mvc
        if (cuX < widthInCU - 1)
            MVC(fencMV[1]);
		// 若CU不是最后一行
        if (!lastRow)
        {
			// 将下面的mv放进mvc
            MVC(fencMV[widthInCU]);
			// 若CU不是第一列
            if (cuX > 0)
				// 将左下角的mv放进mvc
                MVC(fencMV[widthInCU - 1]);
			// 若CU不是最后一列
            if (cuX < widthInCU - 1)
				// 将右下角的mv方向mvc
                MVC(fencMV[widthInCU + 1]);
        }

        if (fenc->lowerResMvs[0][0] && !hme && fenc->lowerResMvCosts[i][listDist[i]][cuXY_4x4] > 0)
        {
            MVC((fenc->lowerResMvs[i][listDist[i]][cuXY_4x4]) * 2);
        }
#undef MVC

		// mvc备选集里没有mv,则置mvp = 0
        if (!numc)
            mvp = 0;
		// mvc备选集里有mv
        else
        {
            ALIGN_VAR_32(pixel, subpelbuf[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
            int mvpcost = MotionEstimate::COST_MAX;

            /* measure SATD cost of each neighbor MV (estimating merge analysis)
             * and use the lowest cost MV as MVP (estimating AMVP). Since all
             * mvc[] candidates are measured here, none are passed to motionEstimate */
			// 遍历mvc中的每个mv
            for (int idx = 0; idx < numc; idx++)
            {
                intptr_t stride = X265_LOWRES_CU_SIZE;
				// 给予mvc中的mv进行运动补偿
                pixel *src = fref->lowresMC(pelOffset, mvc[idx], subpelbuf, stride, hme);
                // 得到satd
				int cost = tld.me.bufSATD(src, stride);
                // 更新最优mvp及其cost
				COPY2_IF_LT(mvpcost, cost, mvp, mvc[idx]);
               
				/* Except for mv0 case, everyting else is likely to have enough residual to not trigger the skip. */
                // 若mvp为0向量 && 双向预测,则可能是skip,将该mvp的cost给skipCost
				if (!mvp.notZero() && bBidir)
                    skipCost = cost;
            }
        }

        /* ME will never return a cost larger than the cost @MVP, so we do not
         * have to check that ME cost is more than the estimated merge cost 
		 * 运动估计得到的satd一定会小于等于之前mvp得到的satd,因为搜索的mv包含mvp */
		// 进行运动估计,得到其satd
        if(!hme)
            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices);
        else
            fencCost = tld.me.motionEstimate(fref, mvmin, mvmax, mvp, 0, NULL, s_merange, *fencMV, m_lookahead.m_param->maxSlices, fref->lowerResPlane[0]);
        
		// 若skipcost<64 且skipcost < 这里运动估计的最优satd && 允许双向,则定为skip
		if (skipCost < 64 && skipCost < fencCost && bBidir)
        {
            fencCost = skipCost;
            *fencMV = 0;
        }

		// 更新最优mv的satd,并记录下参考方向
		// listused = 0	intra 
		//			= 1	前向
		//			= 2 后向
		//			= 3 双向
        COPY2_IF_LT(bcost, fencCost, listused, i + 1);
    }	// end of for (int i = 0; i < 1 + bBidir; i++)

    if (hme)
        return;

	// 若允许双向预测,则进行双向估计
    if (bBidir) /* B, also consider bidir */
    {
        /* NOTE: the wfref0 (weightp) is not used for BIDIR */

        /* avg(l0-mv, l1-mv) candidate */
        ALIGN_VAR_32(pixel, subpelbuf0[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        ALIGN_VAR_32(pixel, subpelbuf1[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        intptr_t stride0 = X265_LOWRES_CU_SIZE, stride1 = X265_LOWRES_CU_SIZE;
        pixel *src0 = fref0->lowresMC(pelOffset, fenc->lowresMvs[0][listDist[0]][cuXY], subpelbuf0, stride0, 0);
        pixel *src1 = fref1->lowresMC(pelOffset, fenc->lowresMvs[1][listDist[1]][cuXY], subpelbuf1, stride1, 0);
        ALIGN_VAR_32(pixel, ref[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
        // 对双向预测的像素进行均值计算
		primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, stride0, src1, stride1, 32);
        // 得到双向预测
		int bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
		// 存储最优satd
        COPY2_IF_LT(bcost, bicost, listused, 3);

        /* co-located candidate */
		// 得到前向co-located像素
        src0 = fref0->lowresPlane[0] + pelOffset;
		// 得到后向co-located像素
        src1 = fref1->lowresPlane[0] + pelOffset;
		// 计算他们的均值
        primitives.pu[LUMA_8x8].pixelavg_pp[NONALIGNED](ref, X265_LOWRES_CU_SIZE, src0, fref0->lumaStride, src1, fref1->lumaStride, 32);
        // 得到satd
		bicost = tld.me.bufSATD(ref, X265_LOWRES_CU_SIZE);
		// 更新最优satd
		COPY2_IF_LT(bcost, bicost, listused, 3);

		// inter的cost要加上lowresPenalty
        bcost += lowresPenalty;
    }
	// Pslice,Pslice允许intra,所以将intra与inter对比
    else /* P, also consider intra */
    {
		// inter的cost要加上lowresPenalty
        bcost += lowresPenalty;

		// 若intra的satd < 之前计算的inter最优satd,则更新其为listused和cost
        if (fenc->intraCost[cuXY] < bcost)
        {
            bcost = fenc->intraCost[cuXY];
            listused = 0;	// listused = 0表示intra
        }
    }

    /* do not include edge blocks in the frame cost estimates, they are not very accurate */
	// 判断当前CU是否边界CU,若是边界CU则不加入到frame的cost中
    const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
                                cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
    // 得到adaptive quan satd
	int bcostAq;
    if (m_lookahead.m_param->rc.qgSize == 8)
        bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor8x8[cuXY] + 128) >> 8) : bcost;
    else
        bcostAq = (bFrameScoreCU && fenc->invQscaleFactor) ? ((bcost * fenc->invQscaleFactor[cuXY] +128) >> 8) : bcost;

	// 若不是边界CU,则累加上satd和adaptive quan satd到frame/slice的satd/aq satd中
    if (bFrameScoreCU)
    {
        if (slice < 0)
        {
            fenc->costEst[b - p0][p1 - b] += bcost;
            fenc->costEstAq[b - p0][p1 - b] += bcostAq;
            if (!listused && !bBidir)
                fenc->intraMbs[b - p0]++;
        }
        else
        {
            m_slice[slice].costEst += bcost;
            m_slice[slice].costEstAq += bcostAq;
            if (!listused && !bBidir)
                m_slice[slice].intraMbs++;
        }
    }

	// 累加上当前CU的satd到行satd中
    fenc->rowSatds[b - p0][p1 - b][cuY] += bcostAq;
	// 存储下当前CU的satd
    fenc->lowresCosts[b - p0][p1 - b][cuXY] = (uint16_t)(X265_MIN(bcost, LOWRES_COST_MASK) | (listused << LOWRES_COST_SHIFT));
}

你可能感兴趣的:(X265)