void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
{
ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
pixel neighbours[2][X265_LOWRES_CU_SIZE * 4 + 1];
pixel* samples = neighbours[0], *filtered = neighbours[1];
const int lookAheadLambda = (int)x265_lambda_tab[X265_LOOKAHEAD_QP];
const int intraPenalty = 5 * lookAheadLambda;
const int lowresPenalty = 4;
const int cuSize = X265_LOWRES_CU_SIZE;
const int cuSize2 = cuSize << 1;
const int sizeIdx = X265_LOWRES_CU_BITS - 2;
pixelcmp_t satd = primitives.pu[sizeIdx].satd;
int planar = !!(cuSize >= 8);
int costEst = 0, costEstAq = 0;
for (int cuY = 0; cuY < heightInCU; cuY++)
{
fenc.rowSatds[0][0][cuY] = 0;
for (int cuX = 0; cuX < widthInCU; cuX++)
{
const int cuXY = cuX + cuY * widthInCU;
const intptr_t pelOffset = cuSize * cuX + cuSize * cuY * fenc.lumaStride;
pixel *pixCur = fenc.lowresPlane[0] + pelOffset;
primitives.cu[sizeIdx].copy_pp(fencIntra, cuSize, pixCur, fenc.lumaStride);
pixCur -= fenc.lumaStride + 1;
memcpy(samples, pixCur, (2 * cuSize + 1) * sizeof(pixel));
for (int i = 1; i <= 2 * cuSize; i++)
samples[cuSize2 + i] = pixCur[i * fenc.lumaStride];
primitives.cu[sizeIdx].intra_filter(samples, filtered);
int cost, icost = me.COST_MAX;
uint32_t ilowmode = 0;
primitives.cu[sizeIdx].intra_pred[DC_IDX](prediction, cuSize, samples, 0, cuSize <= 16);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(icost, cost, ilowmode, DC_IDX);
primitives.cu[sizeIdx].intra_pred[PLANAR_IDX](prediction, cuSize, neighbours[planar], 0, 0);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(icost, cost, ilowmode, PLANAR_IDX);
int filter, acost = me.COST_MAX;
uint32_t mode, alowmode = 4;
for (mode = 5; mode < 35; mode += 5)
{
filter = !!(g_intraFilterFlags[mode] & cuSize);
primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(acost, cost, alowmode, mode);
}
for (uint32_t dist = 2; dist >= 1; dist--)
{
int minusmode = alowmode - dist;
int plusmode = alowmode + dist;
mode = minusmode;
filter = !!(g_intraFilterFlags[mode] & cuSize);
primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(acost, cost, alowmode, mode);
mode = plusmode;
filter = !!(g_intraFilterFlags[mode] & cuSize);
primitives.cu[sizeIdx].intra_pred[mode](prediction, cuSize, neighbours[filter], mode, cuSize <= 16);
cost = satd(fencIntra, cuSize, prediction, cuSize);
COPY2_IF_LT(acost, cost, alowmode, mode);
}
COPY2_IF_LT(icost, acost, ilowmode, alowmode);
icost += intraPenalty + lowresPenalty;
fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
fenc.intraCost[cuXY] = icost;
fenc.intraMode[cuXY] = (uint8_t)ilowmode;
const bool bFrameScoreCU = (cuX > 0 && cuX < widthInCU - 1 &&
cuY > 0 && cuY < heightInCU - 1) || widthInCU <= 2 || heightInCU <= 2;
int icostAq;
if (qgSize == 8)
icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor8x8[cuXY] + 128) >> 8) : icost;
else
icostAq = (bFrameScoreCU && fenc.invQscaleFactor) ? ((icost * fenc.invQscaleFactor[cuXY] +128) >> 8) : icost;
if (bFrameScoreCU)
{
costEst += icost;
costEstAq += icostAq;
}
fenc.rowSatds[0][0][cuY] += icostAq;
}
}
fenc.costEst[0][0] = costEst;
fenc.costEstAq[0][0] = costEstAq;
}