LookaheadTLD::lowresIntraEstimate()

/*
	进行低分辨率intra的satd估计

	过程:
	1.得到intra惩罚和lowres惩罚
	2.得到低分辨率的CUsize,并得到该CUsize的satd计算函数
	3.遍历帧的每一行
		1.初始化行的satd为0,即rowSatds,rowSatds只累计aq satd
		2.遍历行的每一个CU
			1.得到CU align后的序号以及像素偏移量
			2.加载CU的低分辨率像素到fencIntra中
			3.加载CU的低分辨率参考像素top/left到samples中
			4.对CU的低分辨率参考像素samples进行平滑滤波
			5.进行DC预测,计算其satd,更新最优帧内预测方向ilowmode及其satd
			6.进行PLANAR预测,计算其satd,更新最优帧内预测方向ilowmode及其satd
			7.进行33个angle预测,计算他们的satd,找到最优angle预测方向alowmode及其satd
				1.先以5为跨度计算5/10/15/20/25/30预测方向中的最优,更新alowmode和satd
				2.再在上一步得到的最优预测方向周围范围内[alowmode-2,alowmode+2]找到最优,更新alowmode和satd
			8.在DC/PLANAR/angle中得到最优预测方向ilowmode及其satd,此时预测方向全部计算完毕
			9.帧内预测的最优satd要累加上intra惩罚和lowres惩罚
			10.存储当前CU的satd到lowresCost和intraCost中,并记录最优帧内预测方向到intraMode中
			11.判定当前CU是否为边界CU(边界CU不可参与到帧的satd中,他们不准)
			12.基于CU的satd以及invQscaleFactor来得到CU的aq satd
			13.若当前CU非边界CU,则累加到帧的satd和aq satd中
			14.将CU的aq satd累加到行satd中,即rowSatds
	4.存储帧的satd和aq satd到costEst和costEstAq中
*/
void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
{
    ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
    pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
    pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1];
    pixel* samples = neighbours[0], *filtered = neighbours[1];

	// 得到lookahead的lambda
    const int lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
	// 得到intra的惩罚
    const int intraPenalty = 5 * lookAheadLambda;
	// 得到lowres的惩罚
    const int lowresPenalty = 4; /* fixed CU cost overhead */

	// 低分辨率CU的size
    const int cuSize  = X265_LOWRES_CU_SIZE;
	// 低分辨率CUsize double一下
    const int cuSize2 = cuSize << 1;
    const int sizeIdx = X265_LOWRES_CU_BITS - 2;

	// 取satd函数
    pixelcmp_t satd = primitives.pu[sizeIdx].satd;

	// 是否cuSize >= 8
    int planar = !!(cuSize >= 8);

	// 初始化costEst和costEstAq
    int costEst = 0, costEstAq = 0;

	// 遍历每一行CU
    for (int cuY = 0; cuY < heightInCU; cuY++)
    {
		// 初始化当前帧intra的行satd
        fenc.rowSatds[0][0][cuY] = 0;
		
		// 遍历一行的每一个CU
        for (int cuX = 0; cuX < widthInCU; cuX++)
        {
			// 得到当前CU align后的序号
            const int cuXY = cuX + cuY * widthInCU;
            
			// 得到cu的pel偏移量,及其起始像素
			const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * fenc.lumaStride;
			pixel *pixCur = fenc.lowresPlane[0] + pelOffset;

            /* copy fenc pixels 将低分辨率像素拷贝到fencIntra中*/
            primitives.cu[sizeIdx].copy_pp(fencIntra, cuSize, pixCur, fenc.lumaStride);

            /* collect reference sample pixels 拷贝参考像素*/
			// 取当前CU的top像素,即上面的参考像素,并将其拷贝到samples中
            pixCur -= fenc.lumaStride + 1;
            memcpy(samples, pixCur, (2 * cuSize + 1) * sizeof(pixel)); /* top */
			// 将left的参考像素拷贝到samples中
            for (int i = 1; i <= 2 * cuSize; i++)
                samples[cuSize2 + i] = pixCur[i * fenc.lumaStride];    /* left */

			// 对边界参考像素进行滤波
            primitives.cu[sizeIdx].intra_filter(samples, filtered);

            int cost, icost = me.COST_MAX;
            uint32_t ilowmode = 0;

            /* DC and planar */
			// 进行帧内DC预测,计算其satd,并更新最优帧内预测方向ilowmode及其satd
            primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, samples, 0, cuSize <= 16);
            cost = satd(fencIntra, cuSize, prediction, cuSize);
            COPY2_IF_LT(icost, cost, ilowmode, DC_IDX);

			// 进行帧内PLANAR预测,计算其satd,并更新最优帧内预测方向ilowmode及其satd
            primitives.cu[sizeIdx].intra_pred[PLANAR_IDX](prediction, cuSize, neighbours[planar], 0, 0);
            cost = satd(fencIntra, cuSize, prediction, cuSize);
            COPY2_IF_LT(icost, cost, ilowmode, PLANAR_IDX);

            /* scan angular predictions */
            int filter, acost = me.COST_MAX;
            uint32_t mode, alowmode = 4;
			// 每5个一跨度计算angle的satd,并得到最其中最优angle
            for (mode = 5; mode < 35; mode += 5)
            {
                filter = !!(g_intraFilterFlags[mode] & cuSize);
				// angle预测
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                // 得到satd
				cost = satd(fencIntra, cuSize, prediction, cuSize);
				// 更新最优帧内预测方向ilowmode及其satd
                COPY2_IF_LT(acost, cost, alowmode, mode);
            }
			// 以之前得到的最优angle为中心,在其上下个两个angle中找最优
            for (uint32_t dist = 2; dist >= 1; dist--)
            {
                int minusmode = alowmode - dist;
                int plusmode = alowmode + dist;

                mode = minusmode;
                filter = !!(g_intraFilterFlags[mode] & cuSize);
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                cost = satd(fencIntra, cuSize, prediction, cuSize);
                COPY2_IF_LT(acost, cost, alowmode, mode);

                mode = plusmode;
                filter = !!(g_intraFilterFlags[mode] & cuSize);
                primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
                cost = satd(fencIntra, cuSize, prediction, cuSize);
                COPY2_IF_LT(acost, cost, alowmode, mode);
            }

			// 在最优angle和DC/PLANAR中更新最优
            COPY2_IF_LT(icost, acost, ilowmode, alowmode);

			// 最优intra mode的satd要加上intra惩罚和lowres惩罚
			// 这里区别于inter,inter只要加lowres惩罚即可
            icost += intraPenalty + lowresPenalty; /* estimate intra signal cost */

			// 存储最优satd到lowresCost和intraCost中,同时存储最优帧内预测方向到intraMode中
            fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
            fenc.intraCost[cuXY] = icost;
            fenc.intraMode[cuXY] = (uint8_t)ilowmode;
            
			/* do not include edge blocks in the 
            frame cost estimates, they are not very accurate */
			// 检查当前CU是否是边界CU
			// 同inter一样,边界CU不加入到帧的satd,因为不准
            const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
                                        cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
           
			int icostAq;
			// 根据自适应码控的最小CU单元是否为8x8来选择invQscaleFactor
			// 若非边界CU且有qscale偏移,则对之前的stad进行偏移即这里adaptive quan的satd
            if (qgSize == 8)
                icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor8x8[cuXY] + 128) >> 8) : icost;
            else
                icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] +128) >> 8) : icost;

			// 若非边界CU,则累加上CU的satd和adaptive satd到帧的相对应satd中
            if (bFrameScoreCU)
            {
                costEst += icost;
                costEstAq += icostAq;
            }

			//累加CU的adaptive satd到行satd中
            fenc.rowSatds[0][0][cuY] += icostAq;
        }
    }

	// 存储下帧的intra satd和intra adaptive satd
    fenc.costEst[0][0] = costEst;
    fenc.costEstAq[0][0] = costEstAq;
}

你可能感兴趣的:(X265)