HM编码器代码阅读(19)——变换以及量化(二)

下面要讲解变换和量化具体的执行函数。
TComTrQuant::transformNxN函数详解:
这是变换和量化的具体执行函数
(1)首先判断是否确实需要变换量化
(2)如果使用了skip模式,那么调用xTransformSkip进行处理;否则调用xT进行变换处理
(3)调用xQuant函数进行量化
// 变换和量化!!!!!
Void TComTrQuant::transformNxN( TComDataCU* pcCU, 
	Pel*        pcResidual, 
	UInt        uiStride, 
	TCoeff*     rpcCoeff, 
#if ADAPTIVE_QP_SELECTION
	Int*&       rpcArlCoeff, 
#endif
	UInt        uiWidth, 
	UInt        uiHeight, 
	UInt&       uiAbsSum, 
	TextType    eTType, 
	UInt        uiAbsPartIdx,
	Bool        useTransformSkip
	)
{
	// 判断是否不需要变化和量化
	if (pcCU->getCUTransquantBypass(uiAbsPartIdx))
	{
		uiAbsSum=0;
		for (UInt k = 0; k<uiHeight; k++)
		{
			for (UInt j = 0; j<uiWidth; j++)
			{
				rpcCoeff[k*uiWidth+j]= pcResidual[k*uiStride+j];
				uiAbsSum += abs(pcResidual[k*uiStride+j]);
			}
		}
		return;
	}
	// 亮度块的帧内预测模式
	UInt uiMode;  //luma intra pred
	if(eTType == TEXT_LUMA && pcCU->getPredictionMode(uiAbsPartIdx) == MODE_INTRA )
	{
		uiMode = pcCU->getLumaIntraDir( uiAbsPartIdx );
	}
	else
	{
		uiMode = REG_DCT;
	}

	uiAbsSum = 0;
	assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
	Int bitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;

	// 使用变换跳过模式
	if(useTransformSkip)
	{
		// 变换
		xTransformSkip(bitDepth, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	}
	else
	{
		// 变换
		xT(bitDepth, uiMode, pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
	}
	// 量化函数
	xQuant( pcCU, m_plTempCoeff, rpcCoeff,
#if ADAPTIVE_QP_SELECTION
		rpcArlCoeff,
#endif
		uiWidth, uiHeight, uiAbsSum, eTType, uiAbsPartIdx );
}
TComTrQuant::xTransformSkip这个函数的主要功能是对skip模式的TU进变换(具体的计算过程还没细看)
Void TComTrQuant::xTransformSkip(Int bitDepth, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int width, Int height )
{
	assert( width == height );
	UInt uiLog2TrSize = g_aucConvertToBit[ width ] + 2;
	Int  shift = MAX_TR_DYNAMIC_RANGE - bitDepth - uiLog2TrSize;
	UInt transformSkipShift;
	Int  j,k;
	if(shift >= 0)
	{
		transformSkipShift = shift;
		for (j = 0; j < height; j++)
		{    
			for(k = 0; k < width; k ++)
			{
				psCoeff[j*height + k] = piBlkResi[j * uiStride + k] << transformSkipShift;      
			}
		}
	}
	else
	{
		//The case when uiBitDepth > 13
		Int offset;
		transformSkipShift = -shift;
		offset = (1 << (transformSkipShift - 1));
		for (j = 0; j < height; j++)
		{    
			for(k = 0; k < width; k ++)
			{
				psCoeff[j*height + k] = (piBlkResi[j * uiStride + k] + offset) >> transformSkipShift;      
			}
		}
	}
}
TComTrQuant::xT就是普通的变换函数
// 变换函数
Void TComTrQuant::xT(Int bitDepth, UInt uiMode, Pel* piBlkResi, UInt uiStride, Int* psCoeff, Int iWidth, Int iHeight )
{
#if MATRIX_MULT  
	Int iSize = iWidth;
	xTr(bitDepth, piBlkResi,psCoeff,uiStride,(UInt)iSize,uiMode);
#else
	Int j;
	Short block[ 32 * 32 ];

	// 系数
	Short coeff[ 32 * 32 ];

	// 初始化块的值
	for (j = 0; j < iHeight; j++)
	{    
		memcpy( block + j * iWidth, piBlkResi + j * uiStride, iWidth * sizeof( Short ) );
	}

	// 变换之后的系数存放在coeff中
	xTrMxN(bitDepth, block, coeff, iWidth, iHeight, uiMode );

	// 将系数转存到psCoeff中
	for ( j = 0; j < iHeight * iWidth; j++ )
	{    
		psCoeff[ j ] = coeff[ j ];
	}
#endif  
}
xTrMxN函数由函数xT调用,执行二维的变换,里面主要执行了一些矩阵方面的计算(还细看,涉及了很多数学上的东西)
// 2维变换
// TU的大小最大是32,最小是4
void xTrMxN(Int bitDepth, Short *block,Short *coeff, Int iWidth, Int iHeight, UInt uiMode)
{
	Int shift_1st = g_aucConvertToBit[iWidth]  + 1 + bitDepth-8; // log2(iWidth) - 1 + g_bitDepth - 8
	Int shift_2nd = g_aucConvertToBit[iHeight]  + 8;                   // log2(iHeight) + 6

	Short tmp[ 64 * 64 ];

	// 对于4x4的块,比较特殊
	if( iWidth == 4 && iHeight == 4)
	{
		if (uiMode != REG_DCT)
		{
			// 快速变换
			fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
			fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
		}
		else
		{
			// 蝴蝶型变换
			partialButterfly4(block, tmp, shift_1st, iHeight);
			partialButterfly4(tmp, coeff, shift_2nd, iWidth);
		}

	}
	// 大小为8的时候
	else if( iWidth == 8 && iHeight == 8)
	{
		partialButterfly8( block, tmp, shift_1st, iHeight );
		partialButterfly8( tmp, coeff, shift_2nd, iWidth );
	}
	// 大小为16的时候
	else if( iWidth == 16 && iHeight == 16)
	{
		partialButterfly16( block, tmp, shift_1st, iHeight );
		partialButterfly16( tmp, coeff, shift_2nd, iWidth );
	}
	// 大小为32的时候
	else if( iWidth == 32 && iHeight == 32)
	{
		partialButterfly32( block, tmp, shift_1st, iHeight );
		partialButterfly32( tmp, coeff, shift_2nd, iWidth );
	}
}
TComTrQuant::xQuant这个函数主要执行量化操作(具体操作细节还没仔细了解)。
Void TComTrQuant::xQuant( TComDataCU* pcCU, 
	Int*        pSrc, 
	TCoeff*     pDes, 
#if ADAPTIVE_QP_SELECTION
	Int*&       pArlDes,
#endif
	Int         iWidth, 
	Int         iHeight, 
	UInt&       uiAcSum, 
	TextType    eTType, 
	UInt        uiAbsPartIdx )
{
	// 原始系数
	Int*   piCoef    = pSrc;
	// 变换后的系数
	TCoeff* piQCoef   = pDes;

	// 自适应量化系数选择
#if ADAPTIVE_QP_SELECTION
	Int*   piArlCCoef = pArlDes;
#endif
	Int   iAdd = 0;

	Bool useRDOQ = pcCU->getTransformSkip(uiAbsPartIdx,eTType) ? m_useRDOQTS:m_useRDOQ;
	if ( useRDOQ && (eTType == TEXT_LUMA || RDOQ_CHROMA))
	{
#if ADAPTIVE_QP_SELECTION
		// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
		xRateDistOptQuant( pcCU, piCoef, pDes, pArlDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
		// 量化函数,重要!!!!!!!!!!!!!!!!!!!!!!!!!!
#else
		xRateDistOptQuant( pcCU, piCoef, pDes, iWidth, iHeight, uiAcSum, eTType, uiAbsPartIdx );
#endif
	}
	else
	{
		const UInt   log2BlockSize   = g_aucConvertToBit[ iWidth ] + 2;

		UInt scanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, iWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));
		const UInt *scan = g_auiSigLastScan[ scanIdx ][ log2BlockSize - 1 ];

		Int deltaU[32*32] ;

#if ADAPTIVE_QP_SELECTION
		QpParam cQpBase;
		Int iQpBase = pcCU->getSlice()->getSliceQpBase();

		Int qpScaled;
		Int qpBDOffset = (eTType == TEXT_LUMA)? pcCU->getSlice()->getSPS()->getQpBDOffsetY() : pcCU->getSlice()->getSPS()->getQpBDOffsetC();

		if(eTType == TEXT_LUMA)
		{
			qpScaled = iQpBase + qpBDOffset;
		}
		else
		{
			Int chromaQPOffset;
			if(eTType == TEXT_CHROMA_U)
			{
				chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCbQpOffset() + pcCU->getSlice()->getSliceQpDeltaCb();
			}
			else
			{
				chromaQPOffset = pcCU->getSlice()->getPPS()->getChromaCrQpOffset() + pcCU->getSlice()->getSliceQpDeltaCr();
			}
			iQpBase = iQpBase + chromaQPOffset;

			qpScaled = Clip3( -qpBDOffset, 57, iQpBase);

			if(qpScaled < 0)
			{
				qpScaled = qpScaled +  qpBDOffset;
			}
			else
			{
				qpScaled = g_aucChromaScale[ qpScaled ] + qpBDOffset;
			}
		}
		cQpBase.setQpParam(qpScaled);
#endif

		UInt uiLog2TrSize = g_aucConvertToBit[ iWidth ] + 2;
		Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
		assert(scalingListType < SCALING_LIST_NUM);
		Int *piQuantCoeff = 0;
		piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);

		UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;
		Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;  // Represents scaling through forward transform

#if ADAPTIVE_QP_SELECTION
		Int iQBits = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift;
		iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
		Int iQBitsC = QUANT_SHIFT + cQpBase.m_iPer + iTransformShift - ARL_C_PRECISION;  
		Int iAddC   = 1 << (iQBitsC-1);
#else
		Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift;                // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
		iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
#endif

		Int qBits8 = iQBits-8;
		for( Int n = 0; n < iWidth*iHeight; n++ )
		{
			Int iLevel;
			Int  iSign;
			UInt uiBlockPos = n;
			iLevel  = piCoef[uiBlockPos];
			iSign   = (iLevel < 0 ? -1: 1);      

#if ADAPTIVE_QP_SELECTION
			Int64 tmpLevel = (Int64)abs(iLevel) * piQuantCoeff[uiBlockPos];
			if( m_bUseAdaptQpSelect )
			{
				piArlCCoef[uiBlockPos] = (Int)((tmpLevel + iAddC ) >> iQBitsC);
			}
			iLevel = (Int)((tmpLevel + iAdd ) >> iQBits);
			deltaU[uiBlockPos] = (Int)((tmpLevel - (iLevel<<iQBits) )>> qBits8);
#else
			iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits;
			deltaU[uiBlockPos] = (Int)( ((Int64)abs(piCoef[uiBlockPos]) * piQuantCoeff[uiBlockPos] - (iLevel<<iQBits) )>> qBits8 );
#endif
			uiAcSum += iLevel;
			iLevel *= iSign;        
			piQCoef[uiBlockPos] = Clip3( -32768, 32767, iLevel );
		} // for n
		if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
		{
			if(uiAcSum>=2)
			{
				signBitHidingHDQ( piQCoef, piCoef, scan, deltaU, iWidth, iHeight ) ;
			}
		}
	} //if RDOQ
	//return;

}
TComTrQuant::xRateDistOptQuant这个函数由xQuant进行调用,进行带率失真优化的量化操作(这个函数很长)
// 带率失真优化的量化函数
Void TComTrQuant::xRateDistOptQuant                 ( TComDataCU*                     pcCU,
	Int*                            plSrcCoeff,
	TCoeff*                         piDstCoeff,
#if ADAPTIVE_QP_SELECTION
	Int*&                           piArlDstCoeff,
#endif
	UInt                            uiWidth,
	UInt                            uiHeight,
	UInt&                           uiAbsSum,
	TextType                        eTType,
	UInt                            uiAbsPartIdx )
{
	UInt uiLog2TrSize = g_aucConvertToBit[ uiWidth ] + 2;

	// 比特深度
	UInt uiBitDepth = eTType == TEXT_LUMA ? g_bitDepthY : g_bitDepthC;

	// 表示通过前向转换
	Int iTransformShift = MAX_TR_DYNAMIC_RANGE - uiBitDepth - uiLog2TrSize;  // Represents scaling through forward transform
	UInt       uiGoRiceParam       = 0;
	Double     d64BlockUncodedCost = 0;
	const UInt uiLog2BlkSize       = g_aucConvertToBit[ uiWidth ] + 2;

	// 系数的个数
	const UInt uiMaxNumCoeff       = uiWidth * uiHeight;

	// 扫描的方式
	Int scalingListType = (pcCU->isIntra(uiAbsPartIdx) ? 0 : 3) + g_eTTable[(Int)eTType];
	assert(scalingListType < SCALING_LIST_NUM);

	Int iQBits = QUANT_SHIFT + m_cQP.m_iPer + iTransformShift;                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
	Double *pdErrScaleOrg = getErrScaleCoeff(scalingListType,uiLog2TrSize-2,m_cQP.m_iRem);

	// 获取量化系数
	Int *piQCoefOrg = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
	Int *piQCoef = piQCoefOrg;
	Double *pdErrScale = pdErrScaleOrg;
#if ADAPTIVE_QP_SELECTION
	Int iQBitsC = iQBits - ARL_C_PRECISION;
	Int iAddC =  1 << (iQBitsC-1);
#endif

	// 获取系数的扫描的索引
	UInt uiScanIdx = pcCU->getCoefScanIdx(uiAbsPartIdx, uiWidth, eTType==TEXT_LUMA, pcCU->isIntra(uiAbsPartIdx));

#if ADAPTIVE_QP_SELECTION
	memset(piArlDstCoeff, 0, sizeof(Int) *  uiMaxNumCoeff);
#endif

	// 系数的代价
	Double pdCostCoeff [ 32 * 32 ];
	Double pdCostSig   [ 32 * 32 ];
	Double pdCostCoeff0[ 32 * 32 ];


	::memset( pdCostCoeff, 0, sizeof(Double) *  uiMaxNumCoeff );
	::memset( pdCostSig,   0, sizeof(Double) *  uiMaxNumCoeff );
	Int rateIncUp   [ 32 * 32 ];
	Int rateIncDown [ 32 * 32 ];
	Int sigRateDelta[ 32 * 32 ];
	Int deltaU      [ 32 * 32 ];
	::memset( rateIncUp,    0, sizeof(Int) *  uiMaxNumCoeff );
	::memset( rateIncDown,  0, sizeof(Int) *  uiMaxNumCoeff );
	::memset( sigRateDelta, 0, sizeof(Int) *  uiMaxNumCoeff );
	::memset( deltaU,       0, sizeof(Int) *  uiMaxNumCoeff );

	const UInt * scanCG;
	{
		scanCG = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize > 3 ? uiLog2BlkSize-2-1 : 0  ];
		if( uiLog2BlkSize == 3 )	// 8的情况
		{
			scanCG = g_sigLastScan8x8[ uiScanIdx ];
		}
		else if( uiLog2BlkSize == 5 )	// 32的情况
		{
			scanCG = g_sigLastScanCG32x32;
		}
	}
	const UInt uiCGSize = (1 << MLS_CG_SIZE);         // 16
	Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
	UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
	UInt uiNumBlkSide = uiWidth / MLS_CG_SIZE;
	Int iCGLastScanPos = -1;

	UInt    uiCtxSet            = 0;
	Int     c1                  = 1;
	Int     c2                  = 0;
	Double  d64BaseCost         = 0;
	Int     iLastScanPos        = -1;

	UInt    c1Idx     = 0;
	UInt    c2Idx     = 0;
	Int     baseLevel;

	const UInt *scan = g_auiSigLastScan[ uiScanIdx ][ uiLog2BlkSize - 1 ];

	::memset( pdCostCoeffGroupSig,   0, sizeof(Double) * MLS_GRP_NUM );
	::memset( uiSigCoeffGroupFlag,   0, sizeof(UInt) * MLS_GRP_NUM );

	UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
	Int iScanPos;
	coeffGroupRDStats rdStats;     

	for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
	{
		UInt uiCGBlkPos = scanCG[ iCGScanPos ];
		UInt uiCGPosY   = uiCGBlkPos / uiNumBlkSide;
		UInt uiCGPosX   = uiCGBlkPos - (uiCGPosY * uiNumBlkSide);
		::memset( &rdStats, 0, sizeof (coeffGroupRDStats));

		const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
		for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
		{
			// 这里是实际进行量化的地方
			iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
			//===== quantization =====
			UInt    uiBlkPos          = scan[iScanPos];
			// set coeff
			Int uiQ  = piQCoef[uiBlkPos];
			Double dTemp = pdErrScale[uiBlkPos];
			Int lLevelDouble          = plSrcCoeff[ uiBlkPos ];
			lLevelDouble              = (Int)min<Int64>((Int64)abs((Int)lLevelDouble) * uiQ , MAX_INT - (1 << (iQBits - 1)));
#if ADAPTIVE_QP_SELECTION
			if( m_bUseAdaptQpSelect )
			{
				piArlDstCoeff[uiBlkPos]   = (Int)(( lLevelDouble + iAddC) >> iQBitsC );
			}
#endif
			UInt uiMaxAbsLevel        = (lLevelDouble + (1 << (iQBits - 1))) >> iQBits;

			Double dErr               = Double( lLevelDouble );
			pdCostCoeff0[ iScanPos ]  = dErr * dErr * dTemp;
			d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
			piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;

			if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
			{
				iLastScanPos            = iScanPos;
				uiCtxSet                = (iScanPos < SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2;
				iCGLastScanPos          = iCGScanPos;
			}

			if ( iLastScanPos >= 0 )
			{
				//===== coefficient level estimation =====
				UInt  uiLevel;
				UInt  uiOneCtx         = 4 * uiCtxSet + c1;
				UInt  uiAbsCtx         = uiCtxSet + c2;

				if( iScanPos == iLastScanPos )
				{
					uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], 
						lLevelDouble, uiMaxAbsLevel, 0, uiOneCtx, uiAbsCtx, uiGoRiceParam, 
						c1Idx, c2Idx, iQBits, dTemp, 1 );
				}
				else
				{
					UInt   uiPosY        = uiBlkPos >> uiLog2BlkSize;
					UInt   uiPosX        = uiBlkPos - ( uiPosY << uiLog2BlkSize );
					UShort uiCtxSig      = getSigCtxInc( patternSigCtx, uiScanIdx, uiPosX, uiPosY, uiLog2BlkSize, eTType );
					uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
						lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam, 
						c1Idx, c2Idx, iQBits, dTemp, 0 );
					sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
				}
				deltaU[ uiBlkPos ]        = (lLevelDouble - ((Int)uiLevel << iQBits)) >> (iQBits-8);
				if( uiLevel > 0 )
				{
					Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx );
					rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
					rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx ) - rateNow;
				}
				else // uiLevel == 0
				{
					rateIncUp   [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
				}
				piDstCoeff[ uiBlkPos ] = uiLevel;
				d64BaseCost           += pdCostCoeff [ iScanPos ];


				baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
				if( uiLevel >= baseLevel )
				{
					if(uiLevel  > 3*(1<<uiGoRiceParam))
					{
						uiGoRiceParam = min<UInt>(uiGoRiceParam+ 1, 4);
					}
				}
				if ( uiLevel >= 1)
				{
					c1Idx ++;
				}

				//===== update bin model =====
				if( uiLevel > 1 )
				{
					c1 = 0; 
					c2 += (c2 < 2);
					c2Idx ++;
				}
				else if( (c1 < 3) && (c1 > 0) && uiLevel)
				{
					c1++;
				}

				//===== context set update =====
				if( ( iScanPos % SCAN_SET_SIZE == 0 ) && ( iScanPos > 0 ) )
				{
					c2                = 0;
					uiGoRiceParam     = 0;

					c1Idx   = 0;
					c2Idx   = 0; 
					uiCtxSet          = (iScanPos == SCAN_SET_SIZE || eTType!=TEXT_LUMA) ? 0 : 2;
					if( c1 == 0 )
					{
						uiCtxSet++;
					}
					c1 = 1;
				}
			}
			else
			{
				d64BaseCost    += pdCostCoeff0[ iScanPos ];
			}
			rdStats.d64SigCost += pdCostSig[ iScanPos ];
			if (iScanPosinCG == 0 )
			{
				rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
			}
			if (piDstCoeff[ uiBlkPos ] )
			{
				uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
				rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
				rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
				if ( iScanPosinCG != 0 )
				{
					rdStats.iNNZbeforePos0++;
				}
			}
		} //end for (iScanPosinCG)

		if (iCGLastScanPos >= 0) 
		{
			if( iCGScanPos )
			{
				if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
				{
					UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
					d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;  
					pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);  
				} 
				else
				{
					if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
					{
						if ( rdStats.iNNZbeforePos0 == 0 ) 
						{
							d64BaseCost -= rdStats.d64SigCost_0;
							rdStats.d64SigCost -= rdStats.d64SigCost_0;
						}
						// rd-cost if SigCoeffGroupFlag = 0, initialization
						Double d64CostZeroCG = d64BaseCost;

						// add SigCoeffGroupFlag cost to total cost
						UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, uiWidth, uiHeight);
						if (iCGScanPos < iCGLastScanPos)
						{
							d64BaseCost  += xGetRateSigCoeffGroup(1, uiCtxSig); 
							d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);  
							pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig); 
						}

						// try to convert the current coeff group from non-zero to all-zero
						d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
						d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
						d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels

						// if we can save cost, change this block to all-zero block
						if ( d64CostZeroCG < d64BaseCost )      
						{
							uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
							d64BaseCost = d64CostZeroCG;
							if (iCGScanPos < iCGLastScanPos)
							{
								pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig); 
							}
							// reset coeffs to 0 in this block                
							for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
							{
								iScanPos      = iCGScanPos*uiCGSize + iScanPosinCG;
								UInt uiBlkPos = scan[ iScanPos ];

								if (piDstCoeff[ uiBlkPos ])
								{
									piDstCoeff [ uiBlkPos ] = 0;
									pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
									pdCostSig  [ iScanPos ] = 0;
								}
							}
						} // end if ( d64CostAllZeros < d64BaseCost )      
					}
				} // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
			}
			else
			{
				uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
			}
		}
	} //end for (iCGScanPos)

	//===== estimate last position =====
	if ( iLastScanPos < 0 )
	{
		return;
	}

	Double  d64BestCost         = 0;
	Int     ui16CtxCbf          = 0;
	Int     iBestLastIdxP1      = 0;
	if( !pcCU->isIntra( uiAbsPartIdx ) && eTType == TEXT_LUMA && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
	{
		ui16CtxCbf   = 0;
		d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
		d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
	}
	else
	{
		ui16CtxCbf   = pcCU->getCtxQtCbf( eTType, pcCU->getTransformIdx( uiAbsPartIdx ) );
		ui16CtxCbf   = ( eTType ? TEXT_CHROMA : eTType ) * NUM_QT_CBF_CTX + ui16CtxCbf;
		d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
		d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
	}

	Bool bFoundLast = false;
	for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
	{
		UInt uiCGBlkPos = scanCG[ iCGScanPos ];

		d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; 
		if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
		{     
			for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
			{
				iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
				if (iScanPos > iLastScanPos) continue;
				UInt   uiBlkPos     = scan[iScanPos];

				if( piDstCoeff[ uiBlkPos ] )
				{
					UInt   uiPosY       = uiBlkPos >> uiLog2BlkSize;
					UInt   uiPosX       = uiBlkPos - ( uiPosY << uiLog2BlkSize );

					Double d64CostLast= uiScanIdx == SCAN_VER ? xGetRateLast( uiPosY, uiPosX ) : xGetRateLast( uiPosX, uiPosY );
					Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];

					if( totalCost < d64BestCost )
					{
						iBestLastIdxP1  = iScanPos + 1;
						d64BestCost     = totalCost;
					}
					if( piDstCoeff[ uiBlkPos ] > 1 )
					{
						bFoundLast = true;
						break;
					}
					d64BaseCost      -= pdCostCoeff[ iScanPos ];
					d64BaseCost      += pdCostCoeff0[ iScanPos ];
				}
				else
				{
					d64BaseCost      -= pdCostSig[ iScanPos ];
				}
			} //end for 
			if (bFoundLast)
			{
				break;
			}
		} // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
	} // end for 

	for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
	{
		Int blkPos = scan[ scanPos ];
		Int level  = piDstCoeff[ blkPos ];
		uiAbsSum += level;
		piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
	}

	//===== clean uncoded coefficients =====
	for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
	{
		piDstCoeff[ scan[ scanPos ] ] = 0;
	}

	if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
	{
		Int64 rdFactor = (Int64) (
			g_invQuantScales[m_cQP.rem()] * g_invQuantScales[m_cQP.rem()] * (1<<(2*m_cQP.m_iPer))
			/ m_dLambda / 16 / (1<<DISTORTION_PRECISION_ADJUSTMENT(2*(uiBitDepth-8)))
			+ 0.5);
		Int lastCG = -1;
		Int absSum = 0 ;
		Int n ;

		for( Int subSet = (uiWidth*uiHeight-1) >> LOG2_SCAN_SET_SIZE; subSet >= 0; subSet-- )
		{
			Int  subPos     = subSet << LOG2_SCAN_SET_SIZE;
			Int  firstNZPosInCG=SCAN_SET_SIZE , lastNZPosInCG=-1 ;
			absSum = 0 ;

			for(n = SCAN_SET_SIZE-1; n >= 0; --n )
			{
				if( piDstCoeff[ scan[ n + subPos ]] )
				{
					lastNZPosInCG = n;
					break;
				}
			}

			for(n = 0; n <SCAN_SET_SIZE; n++ )
			{
				if( piDstCoeff[ scan[ n + subPos ]] )
				{
					firstNZPosInCG = n;
					break;
				}
			}

			for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
			{
				absSum += piDstCoeff[ scan[ n + subPos ]];
			}

			if(lastNZPosInCG>=0 && lastCG==-1)
			{
				lastCG = 1; 
			} 

			if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
			{
				UInt signbit = (piDstCoeff[scan[subPos+firstNZPosInCG]]>0?0:1);
				if( signbit!=(absSum&0x1) )  // hide but need tune
				{
					// calculate the cost 
					Int64 minCostInc = MAX_INT64, curCost=MAX_INT64;
					Int minPos =-1, finalChange=0, curChange=0;

					for( n = (lastCG==1?lastNZPosInCG:SCAN_SET_SIZE-1) ; n >= 0; --n )
					{
						UInt uiBlkPos   = scan[ n + subPos ];
						if(piDstCoeff[ uiBlkPos ] != 0 )
						{
							Int64 costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos] ;
							Int64 costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos] 
							-   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);

							if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
							{
								costDown -= (4<<15) ;
							}

							if(costUp<costDown)
							{  
								curCost = costUp;
								curChange =  1 ;
							}
							else               
							{
								curChange = -1 ;
								if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
								{
									curCost = MAX_INT64 ;
								}
								else
								{
									curCost = costDown ; 
								}
							}
						}
						else
						{
							curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ; 
							curChange = 1 ;

							if(n<firstNZPosInCG)
							{
								UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
								if(thissignbit != signbit )
								{
									curCost = MAX_INT64;
								}
							}
						}

						if( curCost<minCostInc)
						{
							minCostInc = curCost ;
							finalChange = curChange ;
							minPos = uiBlkPos ;
						}
					}

					if(piDstCoeff[minPos] == 32767 || piDstCoeff[minPos] == -32768)
					{
						finalChange = -1;
					}

					if(plSrcCoeff[minPos]>=0)
					{
						piDstCoeff[minPos] += finalChange ;
					}
					else
					{
						piDstCoeff[minPos] -= finalChange ; 
					}          
				}
			}

			if(lastCG==1)
			{
				lastCG=0 ;  
			}
		}
	}
}


你可能感兴趣的:(C++,编码,h.265,HEVC)