VVC的量化模块使用了一个新技术:依赖性标量量化(Dependent scalar quantization)。依赖标量量化是指变换系数的一组可容许的重构值依赖于在重构顺序中位于当前变换系数级(transform coefficient level)之前的变换系数级的值。与HEVC中使用的传统独立标量量化相比,该方法的主要影响是允许的重建向量在N(N表示变换块中变换系数的数量)维向量空间中更密集。这意味着,对于给定的每N维单位体积的可允许的重建向量的平均数目,输入向量和最近的重建向量之间的平均失真减小了。
依赖标量量化实现过程为:
使用的两个标量量化器,由Q0和Q1表示,如上图所示。可用重建层级的位置由量化步长δ(delta)唯一地指定。使用的标量量化器(Q0或Q1)在比特流中没有显式地发出信号。相反,用于当前变换系数的量化器由在编码/重建顺序中位于当前变换系数之前的变换系数层级的奇偶性来确定。
如上图所示,两个标量量化器(Q0和Q1)间的转换由一个有4个状态的状态机实现。状态可以采用四个不同的值:0、1、2、3。它是由编码/重建顺序中当前变换系数之前的变换系数级别的奇偶性唯一确定的。在变换块的反量化开始时,状态被设置为0。变换系数按扫描顺序(即以相同的顺序对其进行熵解码)重构。在重构当前变换系数之后,如图所示更新状态,其中k表示变换系数电平的值。
在VTM代码中,DQ的入口函数是DepQuant::quant函数,其主要步骤如下:
(1)预初始化量化相关参数
(2)寻找TU的首个非零系数
(3)初始化所有状态参数
(4)从后向前遍历所有系数,对每个系数找出相应的量化状态链、量化系数以及量化状态相应的RD Cost。(xDecideAndUpdate函数实现)
(5)确定RD Cost的最小量化状态链
(6)根据最优状态量华链正向扫描全部系数,存储相应量化后的变换系数电平。
代码如下:
void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
{
CHECKD( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(), "ext precision is not supported" );
//===== reset / pre-init =====
const TUParameters& tuPars = *g_Rom.getTUPars( tu.blocks[compID], compID );
m_quant.initQuantBlock ( tu, compID, cQP, lambda );//初始化相关量化参数
TCoeff* qCoeff = tu.getCoeffs( compID ).buf;
const TCoeff* tCoeff = srcCoeff.buf;//
const int numCoeff = tu.blocks[compID].area();
::memset( tu.getCoeffs( compID ).buf, 0x00, numCoeff*sizeof(TCoeff) );
absSum = 0;
const CompArea& area = tu.blocks[ compID ];
const uint32_t width = area.width;
const uint32_t height = area.height;
const uint32_t lfnstIdx = tu.cu->lfnstIdx;
//===== scaling matrix ====
//const int qpDQ = cQP.Qp + 1;
//const int qpPer = qpDQ / 6;
//const int qpRem = qpDQ - 6 * qpPer;
//TCoeff thresTmp = thres;
bool zeroOut = false;
bool zeroOutforThres = false;
int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
// MTS或者SBT模式,是否进行高频调零
if( ( tu.mtsIdx[compID] > MTS_SKIP || (tu.cs->sps->getUseMTS() && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32)) && compID == COMPONENT_Y)
{
effHeight = (tuPars.m_height == 32) ? 16 : tuPars.m_height;
effWidth = (tuPars.m_width == 32) ? 16 : tuPars.m_width;
zeroOut = (effHeight < tuPars.m_height || effWidth < tuPars.m_width);//是否高频调零
}
zeroOutforThres = zeroOut || (32 < tuPars.m_height || 32 < tuPars.m_width);
//===== find first test position =====
//===== 找到第一个测试位置 =====
int firstTestPos = numCoeff - 1;
if (lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4)
{
firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) ) ? 7 : 15 ;
}
const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
const TCoeff thres = m_quant.getLastThreshold();
for( ; firstTestPos >= 0; firstTestPos-- )//反向扫描找到第一个非零系数
{
if (zeroOutforThres && (tuPars.m_scanId2BlkPos[firstTestPos].x >= ((tuPars.m_width == 32 && zeroOut) ? 16 : 32)
|| tuPars.m_scanId2BlkPos[firstTestPos].y >= ((tuPars.m_height == 32 && zeroOut) ? 16 : 32)))
continue;
TCoeff thresTmp = (enableScalingLists) ? TCoeff(thres / (4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]))
: TCoeff(thres / (4 * defaultQuantisationCoefficient));
if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thresTmp)
{
break;
}
}
if( firstTestPos < 0 )
{
return;
}
//===== real init =====
//===== 初始化所有状态 =====
RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
m_commonCtx.reset( tuPars, *this );
for( int k = 0; k < 12; k++ )
{
m_allStates[k].init();
}
m_startState.init();
//高频调零后实际存在系数的边界
int effectWidth = std::min(32, effWidth);
int effectHeight = std::min(32, effHeight);
for (int k = 0; k < 12; k++)
{
m_allStates[k].effWidth = effectWidth;
m_allStates[k].effHeight = effectHeight;
}
m_startState.effWidth = effectWidth;
m_startState.effHeight = effectHeight;
//===== populate trellis =====
//===== 尝试不同的状态 =====
//从后向前遍历所有的系数,针对每一个系数找出其最优的量化状态链
// 这里的扫描顺序使用的是4x4子块扫描顺序的倒序
for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
{
const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
if (enableScalingLists)
{
m_quant.initQuantBlock(tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos]);
xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), quantCoeff[scanInfo.rasterPos] );
}
else
xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos]), scanInfo, (zeroOut && (scanInfo.posX >= effWidth || scanInfo.posY >= effHeight)), defaultQuantisationCoefficient );
}
//===== find best path =====
//=====确定RD cost最小的量化状态链 =====
Decision decision = { std::numeric_limits::max(), -1, -2 };
int64_t minPathCost = 0;
for( int8_t stateId = 0; stateId < 4; stateId++ )
{
int64_t pathCost = m_trellis[0][stateId].rdCost;
if( pathCost < minPathCost )
{
decision.prevId = stateId;
minPathCost = pathCost;
}
}
//===== backward scanning =====
//=====根据上面确定的最优量化状态链正向扫描全部系数====
int scanIdx = 0;
for( ; decision.prevId >= 0; scanIdx++ )
{
decision = m_trellis[ scanIdx ][ decision.prevId ];
int32_t blkpos = tuPars.m_scanId2BlkPos[scanIdx].idx;
qCoeff[ blkpos ] = ( tCoeff[ blkpos ] < 0 ? -decision.absLevel : decision.absLevel );//量化后的系数
absSum += decision.absLevel;
}
}
}; // namespace DQIntern
xDecideAndUpdate函数通过调用xDecide函数实现每个变换系数的量化状态、量化系数和RD Cost,之后再更新每种量化状态链的RD Cost。
void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
{
Decision* decisions = m_trellis[ scanInfo.scanIdx ];
std::swap( m_prevStates, m_currStates );
xDecide( scanInfo.spt, absCoeff, lastOffset(scanInfo.scanIdx), decisions, zeroOut, quantCoeff );
if( scanInfo.scanIdx )
{
if( scanInfo.eosbb )
{
m_commonCtx.swap();
m_currStates[0].updateStateEOS( scanInfo, m_prevStates, m_skipStates, decisions[0] );
m_currStates[1].updateStateEOS( scanInfo, m_prevStates, m_skipStates, decisions[1] );
m_currStates[2].updateStateEOS( scanInfo, m_prevStates, m_skipStates, decisions[2] );
m_currStates[3].updateStateEOS( scanInfo, m_prevStates, m_skipStates, decisions[3] );
::memcpy( decisions+4, decisions, 4*sizeof(Decision) );
}
else if( !zeroOut )
{
switch( scanInfo.nextNbInfoSbb.num )
{
case 0:
//更新当前状态为0的rdcost的值为decisions[0].rdcost;
m_currStates[0].updateState<0>( scanInfo, m_prevStates, decisions[0] );
//更新当前状态为1的rdcost的值为decisions[1].rdcost;
m_currStates[1].updateState<0>( scanInfo, m_prevStates, decisions[1] );
//更新当前状态为2的rdcost的值为decisions[2].rdcost;
m_currStates[2].updateState<0>( scanInfo, m_prevStates, decisions[2] );
//更新当前状态为3的rdcost的值为decisions[3].rdcost;
m_currStates[3].updateState<0>( scanInfo, m_prevStates, decisions[3] );
break;
case 1:
m_currStates[0].updateState<1>( scanInfo, m_prevStates, decisions[0] );
m_currStates[1].updateState<1>( scanInfo, m_prevStates, decisions[1] );
m_currStates[2].updateState<1>( scanInfo, m_prevStates, decisions[2] );
m_currStates[3].updateState<1>( scanInfo, m_prevStates, decisions[3] );
break;
case 2:
m_currStates[0].updateState<2>( scanInfo, m_prevStates, decisions[0] );
m_currStates[1].updateState<2>( scanInfo, m_prevStates, decisions[1] );
m_currStates[2].updateState<2>( scanInfo, m_prevStates, decisions[2] );
m_currStates[3].updateState<2>( scanInfo, m_prevStates, decisions[3] );
break;
case 3:
m_currStates[0].updateState<3>( scanInfo, m_prevStates, decisions[0] );
m_currStates[1].updateState<3>( scanInfo, m_prevStates, decisions[1] );
m_currStates[2].updateState<3>( scanInfo, m_prevStates, decisions[2] );
m_currStates[3].updateState<3>( scanInfo, m_prevStates, decisions[3] );
break;
case 4:
m_currStates[0].updateState<4>( scanInfo, m_prevStates, decisions[0] );
m_currStates[1].updateState<4>( scanInfo, m_prevStates, decisions[1] );
m_currStates[2].updateState<4>( scanInfo, m_prevStates, decisions[2] );
m_currStates[3].updateState<4>( scanInfo, m_prevStates, decisions[3] );
break;
default:
m_currStates[0].updateState<5>( scanInfo, m_prevStates, decisions[0] );
m_currStates[1].updateState<5>( scanInfo, m_prevStates, decisions[1] );
m_currStates[2].updateState<5>( scanInfo, m_prevStates, decisions[2] );
m_currStates[3].updateState<5>( scanInfo, m_prevStates, decisions[3] );
}
}
if( scanInfo.spt == SCAN_SOCSBB )
{
std::swap( m_prevStates, m_skipStates );
}
}
}
void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut, int quanCoeff)
{
::memcpy( decisions, startDec, 8*sizeof(Decision) );
if( zeroOut )
{
if( spt==SCAN_EOCSBB )
{
m_skipStates[0].checkRdCostSkipSbbZeroOut( decisions[0] );
m_skipStates[1].checkRdCostSkipSbbZeroOut( decisions[1] );
m_skipStates[2].checkRdCostSkipSbbZeroOut( decisions[2] );
m_skipStates[3].checkRdCostSkipSbbZeroOut( decisions[3] );
}
return;
}
//存储4个预量化值的相关参数
PQData pqData[4];
//对absCoeff进行4次预量化,得到量化后的变换系数level和量化成该值的rdcost,第0个和第3个量化值是偶数,第1个和第2个量化值是奇数;
m_quant.preQuantCoeff( absCoeff, pqData, quanCoeff );
//前一个量化状态是0,则当前状态可以是0或者2,根据rdcost更新decision[0/2].rdcost的值
m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]);
//前一个量化状态是1,则当前状态可以是2或者0,根据rdcost更新decision[2/0].rdcost的值
m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]);
//前一个量化状态是2,则当前状态可以是1或3,根据rdcost更新decision[1/3].rdcost的值
m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]);
//前一个量化状态是3,则当前状态可以是3或者1,根据rdcost更新decision[3/1].rdcost的值
m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]);
if( spt==SCAN_EOCSBB )
{
m_skipStates[0].checkRdCostSkipSbb( decisions[0] );
m_skipStates[1].checkRdCostSkipSbb( decisions[1] );
m_skipStates[2].checkRdCostSkipSbb( decisions[2] );
m_skipStates[3].checkRdCostSkipSbb( decisions[3] );
}
//初始化状态0和2的RD Cost
m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] );
m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] );
}
还有许多细节没有看懂。。。之后有时间看懂了再补充吧