下面要讲解变换和量化具体的执行函数。
TComTrQuant::transformNxN函数详解:
这是变换和量化的具体执行函数
(1)首先判断是否确实需要变换量化
(2)如果使用了skip模式,那么调用xTransformSkip进行处理;否则调用xT进行变换处理
(3)调用xQuant函数进行量化
// 变换和量化!!!!!
Void TComTrQuant::transformNxN( TComDataCU* pcCU,
Pel* pcResidual,
UInt uiStride,
TCoeff* rpcCoeff,
#if ADAPTIVE_QP_SELECTION
Int*& rpcArlCoeff,
#endif
UInt uiWidth,
UInt uiHeight,
UInt& uiAbsSum,
TextType eTType,
UInt uiAbsPartIdx,
Bool useTransformSkip
)
{
// 判断是否不需要变化和量化
if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
{
uiAbsSum=0;
for (UInt k = 0; k<uiHeight; k++)
{
for (UInt j = 0; j<uiWidth; j++)
{
rpcCoeff[k*uiWidth+j]= pcResidual[k*uiStride+j];
uiAbsSum += abs(pcResidual[k*uiStride+j]);
}
}
return;
}
// 亮度块的帧内预测模式
UInt uiMode; //luma intra pred
if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
{
uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
}
else
{
uiMode = REG_DCT;
}
uiAbsSum = 0;
assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
// 使用变换跳过模式
if(useTransformSkip)
{
// 变换
xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
}
else
{
// 变换
xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
}
// 量化函数
xQuant( pcCU, m_plTempCoeff, rpcCoeff,
#if ADAPTIVE_QP_SELECTION
rpcArlCoeff,
#endif
uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
}
TComTrQuant::xTransformSkip这个函数的主要功能是对skip模式的TU进变换(具体的计算过程还没细看)
Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
{
assert( width == height );
UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
Int shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
UInt transformSkipShift;
Int j,k;
if(shift >= 0)
{
transformSkipShift = shift;
for (j = 0; j < height; j++)
{
for(k = 0; k < width; k ++)
{
psCoeff[j*height + k] = piBlkResi[j * uiStride + k] << transformSkipShift;
}
}
}
else
{
//The case when uiBitDepth > 13
Int offset;
transformSkipShift = -shift;
offset = (1 << (transformSkipShift - 1));
for (j = 0; j < height; j++)
{
for(k = 0; k < width; k ++)
{
psCoeff[j*height + k] = (piBlkResi[j * uiStride + k] + offset) >> transformSkipShift;
}
}
}
}
TComTrQuant::xT就是普通的变换函数
// 变换函数
Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
{
#if MATRIX_MULT
Int iSize = iWidth;
xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
#else
Int j;
Short block[ 32 * 32 ];
// 系数
Short coeff[ 32 * 32 ];
// 初始化块的值
for (j = 0; j < iHeight; j++)
{
memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
}
// 变换之后的系数存放在coeff中
xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );
// 将系数转存到psCoeff中
for ( j = 0; j < iHeight * iWidth; j++ )
{
psCoeff[ j ] = coeff[ j ];
}
#endif
}
xTrMxN函数由函数xT调用,执行二维的变换,里面主要执行了一些矩阵方面的计算(还细看,涉及了很多数学上的东西)
// 2维变换
// TU的大小最大是32,最小是4
void xTrMxN(Int bitDepth, Short *block,Short *coeff, Int iWidth, Int iHeight, UInt uiMode)
{
Int shift_1st = g_aucConvertToBit[iWidth] + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
Int shift_2nd = g_aucConvertToBit[iHeight] + 8; // log2(iHeight) + 6
Short tmp[ 64 * 64 ];
// 对于4x4的块,比较特殊
if( iWidth == 4 && iHeight == 4)
{
if (uiMode != REG_DCT)
{
// 快速变换
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
}
else
{
// 蝴蝶型变换
partialButterfly4(block, tmp, shift_1st, iHeight);
partialButterfly4(tmp, coeff, shift_2nd, iWidth);
}
}
// 大小为8的时候
else if( iWidth == 8 && iHeight == 8)
{
partialButterfly8( block, tmp, shift_1st, iHeight );
partialButterfly8( tmp, coeff, shift_2nd, iWidth );
}
// 大小为16的时候
else if( iWidth == 16 && iHeight == 16)
{
partialButterfly16( block, tmp, shift_1st, iHeight );
partialButterfly16( tmp, coeff, shift_2nd, iWidth );
}
// 大小为32的时候
else if( iWidth == 32 && iHeight == 32)
{
partialButterfly32( block, tmp, shift_1st, iHeight );
partialButterfly32( tmp, coeff, shift_2nd, iWidth );
}
}
TComTrQuant::xQuant这个函数主要执行量化操作(具体操作细节还没仔细了解)。
Void TComTrQuant::xQuant( TComDataCU* pcCU,
Int* pSrc,
TCoeff* pDes,
#if ADAPTIVE_QP_SELECTION
Int*& pArlDes,
#endif
Int iWidth,
Int iHeight,
UInt& uiAcSum,
TextType eTType,
UInt uiAbsPartIdx )
{
// 原始系数
Int* piCoef = pSrc;
// 变换后的系数
TCoeff* piQCoef = pDes;
// 自适应量化系数选择
#if ADAPTIVE_QP_SELECTION
Int* piArlCCoef = pArlDes;
#endif
Int iAdd = 0;
Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
if ( useRDOQ && (eTType == TEXT_LUMA || RDOQ_CHROMA))
{
#if ADAPTIVE_QP_SELECTION
// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#else
xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
#endif
}
else
{
const UInt log2BlockSize = g_aucConvertToBit[ iWidth ] + 2;
UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];
Int deltaU[32*32] ;
#if ADAPTIVE_QP_SELECTION
QpParam cQpBase;
Int iQpBase = pcCU->getSlice()->getSliceQpBase();
Int qpScaled;
Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();
if(eTType == TEXT_LUMA)
{
qpScaled = iQpBase + qpBDOffset;
}
else
{
Int chromaQPOffset;
if(eTType == TEXT_CHROMA_U)
{
chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
}
else
{
chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
}
iQpBase = iQpBase + chromaQPOffset;
qpScaled = Clip3( -qpBDOffset, 57, iQpBase);
if(qpScaled < 0)
{
qpScaled = qpScaled + qpBDOffset;
}
else
{
qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
}
}
cQpBase.setQpParam(qpScaled);
#endif
UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
assert(scalingListType < SCALING_LIST_NUM);
Int *piQuantCoeff = 0;
piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
#if ADAPTIVE_QP_SELECTION
Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;
Int iAddC = 1 << (iQBitsC-1);
#else
Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
#endif
Int qBits8 = iQBits-8;
for( Int n = 0; n < iWidth*iHeight; n++ )
{
Int iLevel;
Int iSign;
UInt uiBlockPos = n;
iLevel = piCoef[uiBlockPos];
iSign = (iLevel < 0 ? -1: 1);
#if ADAPTIVE_QP_SELECTION
Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
if( m_bUseAdaptQpSelect )
{
piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
}
iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
#else
iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
#endif
uiAcSum += iLevel;
iLevel *= iSign;
piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
} // for n
if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
{
if(uiAcSum>=2)
{
signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
}
}
} //if RDOQ
//return;
}
TComTrQuant::xRateDistOptQuant这个函数由xQuant进行调用,进行带率失真优化的量化操作(这个函数很长)
// 带率失真优化的量化函数
Void TComTrQuant::xRateDistOptQuant ( TComDataCU* pcCU,
Int* plSrcCoeff,
TCoeff* piDstCoeff,
#if ADAPTIVE_QP_SELECTION
Int*& piArlDstCoeff,
#endif
UInt uiWidth,
UInt uiHeight,
UInt& uiAbsSum,
TextType eTType,
UInt uiAbsPartIdx )
{
UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;
// 比特深度
UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
// 表示通过前向转换
Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize; // Represents scaling through forward transform
UInt uiGoRiceParam = 0;
Double d64BlockUncodedCost = 0;
const UInt uiLog2BlkSize = g_aucConvertToBit[ uiWidth ] + 2;
// 系数的个数
const UInt uiMaxNumCoeff = uiWidth * uiHeight;
// 扫描的方式
Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
assert(scalingListType < SCALING_LIST_NUM);
Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);
// 获取量化系数
Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
Int *piQCoef = piQCoefOrg;
Double *pdErrScale = pdErrScaleOrg;
#if ADAPTIVE_QP_SELECTION
Int iQBitsC = iQBits - ARL_C_PRECISION;
Int iAddC = 1 << (iQBitsC-1);
#endif
// 获取系数的扫描的索引
UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
#if ADAPTIVE_QP_SELECTION
memset(piArlDstCoeff, 0, sizeof(Int) * uiMaxNumCoeff);
#endif
// 系数的代价
Double pdCostCoeff [ 32 * 32 ];
Double pdCostSig [ 32 * 32 ];
Double pdCostCoeff0[ 32 * 32 ];
::memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
::memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
Int rateIncUp [ 32 * 32 ];
Int rateIncDown [ 32 * 32 ];
Int sigRateDelta[ 32 * 32 ];
Int deltaU [ 32 * 32 ];
::memset( rateIncUp, 0, sizeof(Int) * uiMaxNumCoeff );
::memset( rateIncDown, 0, sizeof(Int) * uiMaxNumCoeff );
::memset( sigRateDelta, 0, sizeof(Int) * uiMaxNumCoeff );
::memset( deltaU, 0, sizeof(Int) * uiMaxNumCoeff );
const UInt * scanCG;
{
scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0 ];
if( uiLog2BlkSize == 3 ) // 8的情况
{
scanCG = g_sigLastScan8x8[ uiScanIdx ];
}
else if( uiLog2BlkSize == 5 ) // 32的情况
{
scanCG = g_sigLastScanCG32x32;
}
}
const UInt uiCGSize = (1 << MLS_CG_SIZE); // 16
Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
Int iCGLastScanPos = -1;
UInt uiCtxSet = 0;
Int c1 = 1;
Int c2 = 0;
Double d64BaseCost = 0;
Int iLastScanPos = -1;
UInt c1Idx = 0;
UInt c2Idx = 0;
Int baseLevel;
const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];
::memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
::memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
Int iScanPos;
coeffGroupRDStats rdStats;
for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
{
UInt uiCGBlkPos = scanCG[ iCGScanPos ];
UInt uiCGPosY = uiCGBlkPos / uiNumBlkSide;
UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
::memset( &rdStats, 0, sizeof (coeffGroupRDStats));
const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{
// 这里是实际进行量化的地方
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
//===== quantization =====
UInt uiBlkPos = scan[iScanPos];
// set coeff
Int uiQ = piQCoef[uiBlkPos];
Double dTemp = pdErrScale[uiBlkPos];
Int lLevelDouble = plSrcCoeff[ uiBlkPos ];
lLevelDouble = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
#if ADAPTIVE_QP_SELECTION
if( m_bUseAdaptQpSelect )
{
piArlDstCoeff[uiBlkPos] = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
}
#endif
UInt uiMaxAbsLevel = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;
Double dErr = Double( lLevelDouble );
pdCostCoeff0[ iScanPos ] = dErr * dErr * dTemp;
d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
{
iLastScanPos = iScanPos;
uiCtxSet = (iScanPos < SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2;
iCGLastScanPos = iCGScanPos;
}
if ( iLastScanPos >= 0 )
{
//===== coefficient level estimation =====
UInt uiLevel;
UInt uiOneCtx = 4 * uiCtxSet + c1;
UInt uiAbsCtx = uiCtxSet + c2;
if( iScanPos == iLastScanPos )
{
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam,
c1Idx, c2Idx, iQBits, dTemp, 1 );
}
else
{
UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
UShort uiCtxSig = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
c1Idx, c2Idx, iQBits, dTemp, 0 );
sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
}
deltaU[ uiBlkPos ] = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
if( uiLevel > 0 )
{
Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
}
else // uiLevel == 0
{
rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
}
piDstCoeff[ uiBlkPos ] = uiLevel;
d64BaseCost += pdCostCoeff [ iScanPos ];
baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
if( uiLevel >= baseLevel )
{
if(uiLevel > 3*(1<<uiGoRiceParam))
{
uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
}
}
if ( uiLevel >= 1)
{
c1Idx ++;
}
//===== update bin model =====
if( uiLevel > 1 )
{
c1 = 0;
c2 += (c2 < 2);
c2Idx ++;
}
else if( (c1 < 3) && (c1 > 0) && uiLevel)
{
c1++;
}
//===== context set update =====
if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
{
c2 = 0;
uiGoRiceParam = 0;
c1Idx = 0;
c2Idx = 0;
uiCtxSet = (iScanPos == SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2;
if( c1 == 0 )
{
uiCtxSet++;
}
c1 = 1;
}
}
else
{
d64BaseCost += pdCostCoeff0[ iScanPos ];
}
rdStats.d64SigCost += pdCostSig[ iScanPos ];
if (iScanPosinCG == 0 )
{
rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
}
if (piDstCoeff[ uiBlkPos ] )
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
if ( iScanPosinCG != 0 )
{
rdStats.iNNZbeforePos0++;
}
}
} //end for (iScanPosinCG)
if (iCGLastScanPos >= 0)
{
if( iCGScanPos )
{
if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
{
UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
else
{
if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
{
if ( rdStats.iNNZbeforePos0 == 0 )
{
d64BaseCost -= rdStats.d64SigCost_0;
rdStats.d64SigCost -= rdStats.d64SigCost_0;
}
// rd-cost if SigCoeffGroupFlag = 0, initialization
Double d64CostZeroCG = d64BaseCost;
// add SigCoeffGroupFlag cost to total cost
UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
if (iCGScanPos < iCGLastScanPos)
{
d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
}
// try to convert the current coeff group from non-zero to all-zero
d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
// if we can save cost, change this block to all-zero block
if ( d64CostZeroCG < d64BaseCost )
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
d64BaseCost = d64CostZeroCG;
if (iCGScanPos < iCGLastScanPos)
{
pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
}
// reset coeffs to 0 in this block
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
UInt uiBlkPos = scan[ iScanPos ];
if (piDstCoeff[ uiBlkPos ])
{
piDstCoeff [ uiBlkPos ] = 0;
pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
pdCostSig [ iScanPos ] = 0;
}
}
} // end if ( d64CostAllZeros < d64BaseCost )
}
} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
}
else
{
uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
}
}
} //end for (iCGScanPos)
//===== estimate last position =====
if ( iLastScanPos < 0 )
{
return;
}
Double d64BestCost = 0;
Int ui16CtxCbf = 0;
Int iBestLastIdxP1 = 0;
if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
{
ui16CtxCbf = 0;
d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
}
else
{
ui16CtxCbf = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
ui16CtxCbf = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
}
Bool bFoundLast = false;
for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
{
UInt uiCGBlkPos = scanCG[ iCGScanPos ];
d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
{
for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
{
iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
if (iScanPos > iLastScanPos) continue;
UInt uiBlkPos = scan[iScanPos];
if( piDstCoeff[ uiBlkPos ] )
{
UInt uiPosY = uiBlkPos >> uiLog2BlkSize;
UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlkSize );
Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
if( totalCost < d64BestCost )
{
iBestLastIdxP1 = iScanPos + 1;
d64BestCost = totalCost;
}
if( piDstCoeff[ uiBlkPos ] > 1 )
{
bFoundLast = true;
break;
}
d64BaseCost -= pdCostCoeff[ iScanPos ];
d64BaseCost += pdCostCoeff0[ iScanPos ];
}
else
{
d64BaseCost -= pdCostSig[ iScanPos ];
}
} //end for
if (bFoundLast)
{
break;
}
} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
} // end for
for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
{
Int blkPos = scan[ scanPos ];
Int level = piDstCoeff[ blkPos ];
uiAbsSum += level;
piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
}
//===== clean uncoded coefficients =====
for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
{
piDstCoeff[ scan[ scanPos ] ] = 0;
}
if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
{
Int64 rdFactor = (Int64) (
g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
+ 0.5);
Int lastCG = -1;
Int absSum = 0 ;
Int n ;
for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
{
Int subPos = subSet << LOG2_SCAN_SET_SIZE;
Int firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
absSum = 0 ;
for(n = SCAN_SET_SIZE-1; n >= 0; --n )
{
if( piDstCoeff[ scan[ n + subPos ]] )
{
lastNZPosInCG = n;
break;
}
}
for(n = 0; n <SCAN_SET_SIZE; n++ )
{
if( piDstCoeff[ scan[ n + subPos ]] )
{
firstNZPosInCG = n;
break;
}
}
for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
{
absSum += piDstCoeff[ scan[ n + subPos ]];
}
if(lastNZPosInCG>=0 && lastCG==-1)
{
lastCG = 1;
}
if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
{
UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
if( signbit!=(absSum&0x1) ) // hide but need tune
{
// calculate the cost
Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
Int minPos =-1, finalChange=0, curChange=0;
for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
{
UInt uiBlkPos = scan[ n + subPos ];
if(piDstCoeff[ uiBlkPos ] != 0 )
{
Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
- ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
{
costDown -= (4<<15) ;
}
if(costUp<costDown)
{
curCost = costUp;
curChange = 1 ;
}
else
{
curChange = -1 ;
if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
{
curCost = MAX_INT64 ;
}
else
{
curCost = costDown ;
}
}
}
else
{
curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
curChange = 1 ;
if(n<firstNZPosInCG)
{
UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
if(thissignbit != signbit )
{
curCost = MAX_INT64;
}
}
}
if( curCost<minCostInc)
{
minCostInc = curCost ;
finalChange = curChange ;
minPos = uiBlkPos ;
}
}
if(piDstCoeff[minPos] == 32767 || piDstCoeff[minPos] == -32768)
{
finalChange = -1;
}
if(plSrcCoeff[minPos]>=0)
{
piDstCoeff[minPos] += finalChange ;
}
else
{
piDstCoeff[minPos] -= finalChange ;
}
}
}
if(lastCG==1)
{
lastCG=0 ;
}
}
}
}