通过前面一篇亚像素入口和过程的博客,分析了亚像素的入口和过程,这篇文章分析1/2插值的过程:
xExtDIFUpSamplingH 这个函数是进行1/2精度插值,首先对参考图像进行水平插值,整像素位置直接复制给了m_filteredBlockTmp[0],调用了filterHor这个函数进行水平插值
调用了m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 0, false, chFmt, pattern->getBitDepthY());进行水平整像素复制给m_filteredBlockTmp[0]
filterHor这个函数分了三种情况:frac = 0、亮度和色度。frac = 0,即整数,不需要插值。,在进行整像素复制的时候有调用了filterCopy这个函数,
调用了filterCopy(bitDepth, src, srcStride, dst, dstStride, width, height, true, isLast );先看isFirst == true为第一次插值:Pel val = leftShift_round(src[col], shift);dst[col] = val - (Pel)IF_INTERNAL_OFFS;
再调用m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 2, false, chFmt, pattern->getBitDepthY());进行1/2像素水平插值赋值给m_filteredBlockTmp[2]
水平整像素插值后的Y做垂直方向整像素插值,结果存储在m_filteredBlock[0][0]中, dstPtr = m_filteredBlock[0][0].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());
水平1/2像素插值后的Y做垂直方向1/2像素插值,结果存储在m_filteredBlock[2][0]中
intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride + 1;
dstPtr = m_filteredBlock[2][0].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());
/**
* \brief Generate half-sample interpolated block
*
* \param pattern Reference picture ROI
* \param biPred Flag indicating whether block is for biprediction
*/
//产生1/2精度的插值像素块
Void TEncSearch::xExtDIFUpSamplingH(TComPattern* pattern)
{
Int width = pattern->getROIYWidth();
Int height = pattern->getROIYHeight();
Int srcStride = pattern->getPatternLStride();
//m_filteredBlockTmp[]是个临时的中间量。实际用于存储水平插值得到的各像素位置的YUV信息。[]中的数字指示水平插值的像素位置。
//m_filteredBlock[][]在做完水平插值之后,会进行垂直插值,得到最终的数据,存储与m_filteredBlock[][]中。第一个[]中的数字指示垂直插值位置,第二个指示水平插值位置。
Int intStride = m_filteredBlockTmp[0].getStride(COMPONENT_Y);
Int dstStride = m_filteredBlock[0][0].getStride(COMPONENT_Y);
Pel *intPtr;
Pel *dstPtr; //插值后的MV地址
Int filterSize = NTAPS_LUMA;//8抽头
Int halfFilterSize = (filterSize >> 1);//4
Pel *srcPtr = pattern->getROIY() - halfFilterSize * srcStride - 1; //源MV地址
const ChromaFormat chFmt = m_filteredBlock[0][0].getChromaFormat();
//对参考图像进行水平插值,整像素位置直接复制给了m_filteredBlockTmp[0],1/2像素位置插值后给了m_filteredBlockTmp[2]。
m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[0].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 0, false, chFmt, pattern->getBitDepthY());
m_if.filterHor(COMPONENT_Y, srcPtr, srcStride, m_filteredBlockTmp[2].getAddr(COMPONENT_Y), intStride, width + 1, height + filterSize, 2, false, chFmt, pattern->getBitDepthY());
////水平整像素插值后的Y做垂直方向整像素插值,结果存储在m_filteredBlock[0][0]中
intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + halfFilterSize * intStride + 1;
dstPtr = m_filteredBlock[0][0].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());
//水平整像素插值后的Y做垂直方向1/2像素插值,结果存储在m_filteredBlock[2][0]中
intPtr = m_filteredBlockTmp[0].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride + 1;
dstPtr = m_filteredBlock[2][0].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());
//水平1/2像素插值后的Y做垂直方向整像素插值,结果存储在m_filteredBlock[0][2]中
intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + halfFilterSize * intStride;
dstPtr = m_filteredBlock[0][2].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 0, 0, false, true, chFmt, pattern->getBitDepthY());
//水平1/2像素插值后的Y做垂直方向1/2像素插值,结果存储在m_filteredBlock[2][2]中
intPtr = m_filteredBlockTmp[2].getAddr(COMPONENT_Y) + (halfFilterSize - 1) * intStride;
dstPtr = m_filteredBlock[2][2].getAddr(COMPONENT_Y);
m_if.filterVer(COMPONENT_Y, intPtr, intStride, dstPtr, dstStride, width + 1, height + 1, 2, false, true, chFmt, pattern->getBitDepthY());
}
/**
* \brief Filter a block of Luma/Chroma samples (horizontal)
*
* \param compID Chroma component ID
* \param src Pointer to source samples源MV地址
* \param srcStride Stride of source samples
* \param dst Pointer to destination samples插值后的MV地址
* \param dstStride Stride of destination samples
* \param width Width of block
* \param height Height of block
* \param frac Fractional sample offset分数偏移
* \param isLast Flag indicating whether it is the last filtering operation
* \param fmt Chroma format
* \param bitDepth Bit depth
*/
//分了三种情况:frac = 0、亮度和色度。frac = 0,即整数,不需要插值。
Void TComInterpolationFilter::filterHor(const ComponentID compID, Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Int frac, Bool isLast, const ChromaFormat fmt, const Int bitDepth )
{
if ( frac == 0 )//整数直接复制
{
filterCopy(bitDepth, src, srcStride, dst, dstStride, width, height, true, isLast );
}
else if (isLuma(compID)) //Y分量小数插值
{
assert(frac >= 0 && frac < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS);
filterHor(bitDepth, src, srcStride, dst, dstStride, width, height, isLast, m_lumaFilter[frac]);
}
else
{
const UInt csx = getComponentScaleX(compID, fmt);
assert(frac >=0 && csx<2 && (frac<<(1-csx)) < CHROMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS);
filterHor(bitDepth, src, srcStride, dst, dstStride, width, height, isLast, m_chromaFilter[frac<<(1-csx)]);
}
}
/**
* \brief Apply unit FIR filter to a block of samples
*
* \param bitDepth bitDepth of samples
* \param src Pointer to source samples
* \param srcStride Stride of source samples
* \param dst Pointer to destination samples
* \param dstStride Stride of destination samples
* \param width Width of block
* \param height Height of block
* \param isFirst Flag indicating whether it is the first filtering operation
* \param isLast Flag indicating whether it is the last filtering operation
*/
Void TComInterpolationFilter::filterCopy(Int bitDepth, const Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Bool isFirst, Bool isLast)
{
Int row, col;
if ( isFirst == isLast )
{
for (row = 0; row < height; row++)
{
for (col = 0; col < width; col++)
{
dst[col] = src[col];
}
src += srcStride;
dst += dstStride;
}
}
else if ( isFirst )
{
const Int shift = std::max(2, (IF_INTERNAL_PREC - bitDepth));
for (row = 0; row < height; row++)
{
for (col = 0; col < width; col++)
{
Pel val = leftShift_round(src[col], shift);
dst[col] = val - (Pel)IF_INTERNAL_OFFS;
}
src += srcStride;
dst += dstStride;
}
}
else
{
const Int shift = std::max(2, (IF_INTERNAL_PREC - bitDepth));
Pel maxVal = (1 << bitDepth) - 1;
Pel minVal = 0;
for (row = 0; row < height; row++)
{
for (col = 0; col < width; col++)
{
Pel val = src[ col ];
val = rightShift_round((val + IF_INTERNAL_OFFS), shift);
if (val < minVal)
{
val = minVal;
}
if (val > maxVal)
{
val = maxVal;
}
dst[col] = val;
}
src += srcStride;
dst += dstStride;
}
}
}
/**
* \brief Filter a block of samples (horizontal)
*
* \tparam N Number of taps
* \param bitDepth Bit depth of samples
* \param src Pointer to source samples
* \param srcStride Stride of source samples
* \param dst Pointer to destination samples
* \param dstStride Stride of destination samples
* \param width Width of block
* \param height Height of block
* \param isLast Flag indicating whether it is the last filtering operation
* \param coeff Pointer to filter taps
*/
template
Void TComInterpolationFilter::filterHor(Int bitDepth, Pel *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, Bool isLast, TFilterCoeff const *coeff)
{
if ( isLast )
{
filter(bitDepth, src, srcStride, dst, dstStride, width, height, coeff);
}
else
{
filter(bitDepth, src, srcStride, dst, dstStride, width, height, coeff);
}
}
/**
* \brief Apply FIR filter to a block of samples
*
* \tparam N Number of taps// 抽头数
* \tparam isVertical Flag indicating filtering along vertical direction
* \tparam isFirst Flag indicating whether it is the first filtering operation
* \tparam isLast Flag indicating whether it is the last filtering operation
* \param bitDepth Bit depth of samples
* \param src Pointer to source samples 源MV地址
* \param srcStride Stride of source samples
* \param dst Pointer to destination samples插值后的MV地址
* \param dstStride Stride of destination samples
* \param width Width of block
* \param height Height of block
* \param coeff Pointer to filter taps 抽头系数
*/
// 插值滤波器
template
Void TComInterpolationFilter::filter(Int bitDepth, Pel const *src, Int srcStride, Pel *dst, Int dstStride, Int width, Int height, TFilterCoeff const *coeff)
{
Int row, col;
Pel c[8];//系数数组
c[0] = coeff[0];//抽头0系数
c[1] = coeff[1];//抽头1系数
if ( N >= 4 ) //N≥4时,取抽头2和3系数(UV分量)
{
c[2] = coeff[2];
c[3] = coeff[3];
}
if ( N >= 6 ) //N≥6时,取抽头4和5系数
{
c[4] = coeff[4];
c[5] = coeff[5];
}
if ( N == 8 )//N=8时,取抽头6和7系数(Y分量)
{
c[6] = coeff[6];
c[7] = coeff[7];
}
Int cStride = ( isVertical ) ? srcStride : 1; //垂直情况下步长为srcStride,水平情况下为1
src -= ( N/2 - 1 ) * cStride;//找到前面N/2-1个整数点
Int offset;
Pel maxVal;
Int headRoom = std::max(2, (IF_INTERNAL_PREC - bitDepth));
Int shift = IF_FILTER_PREC;
// with the current settings (IF_INTERNAL_PREC = 14 and IF_FILTER_PREC = 6), though headroom can be
// negative for bit depths greater than 14, shift will remain non-negative for bit depths of 8->20
assert(shift >= 0);
if ( isLast )
{
shift += (isFirst) ? 0 : headRoom;
offset = 1 << (shift - 1);
offset += (isFirst) ? 0 : IF_INTERNAL_OFFS << IF_FILTER_PREC;
maxVal = (1 << bitDepth) - 1;
}
else
{
shift -= (isFirst) ? headRoom : 0;
offset = (isFirst) ? -IF_INTERNAL_OFFS << shift : 0;
maxVal = 0;
}
#if VECTOR_CODING__INTERPOLATION_FILTER && (RExt__HIGH_BIT_DEPTH_SUPPORT==0)
if( bitDepth <= 10 )
{
if( N == 8 && !( width & 0x07 ) )
{
Short minVal = 0;
__m128i mmOffset = _mm_set1_epi32( offset );
__m128i mmCoeff[8];
__m128i mmMin = _mm_set1_epi16( minVal );
__m128i mmMax = _mm_set1_epi16( maxVal );
for( Int n = 0 ; n < 8 ; n++ )
mmCoeff[n] = _mm_set1_epi16( c[n] );
for( row = 0 ; row < height ; row++ )
{
for( col = 0 ; col < width ; col += 8 )
{
__m128i mmFiltered = simdInterpolateLuma8( src + col , cStride , mmCoeff , mmOffset , shift );
if( isLast )
{
mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
}
_mm_storeu_si128( ( __m128i * )( dst + col ) , mmFiltered );
}
src += srcStride;
dst += dstStride;
}
return;
}
else if( N == 8 && !( width & 0x03 ) )
{
Short minVal = 0;
__m128i mmOffset = _mm_set1_epi32( offset );
__m128i mmCoeff[8];
__m128i mmMin = _mm_set1_epi16( minVal );
__m128i mmMax = _mm_set1_epi16( maxVal );
for( Int n = 0 ; n < 8 ; n++ )
mmCoeff[n] = _mm_set1_epi16( c[n] );
for( row = 0 ; row < height ; row++ )
{
for( col = 0 ; col < width ; col += 4 )
{
__m128i mmFiltered = simdInterpolateLuma4( src + col , cStride , mmCoeff , mmOffset , shift );
if( isLast )
{
mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
}
_mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
}
src += srcStride;
dst += dstStride;
}
return;
}
else if( N == 4 && !( width & 0x03 ) )
{
Short minVal = 0;
__m128i mmOffset = _mm_set1_epi32( offset );
__m128i mmCoeff[8];
__m128i mmMin = _mm_set1_epi16( minVal );
__m128i mmMax = _mm_set1_epi16( maxVal );
for( Int n = 0 ; n < 4 ; n++ )
mmCoeff[n] = _mm_set1_epi16( c[n] );
for( row = 0 ; row < height ; row++ )
{
for( col = 0 ; col < width ; col += 4 )
{
__m128i mmFiltered = simdInterpolateChroma4( src + col , cStride , mmCoeff , mmOffset , shift );
if( isLast )
{
mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
}
_mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
}
src += srcStride;
dst += dstStride;
}
return;
}
else if( N == 2 && !( width & 0x07 ) )
{
Short minVal = 0;
__m128i mmOffset = _mm_set1_epi32( offset );
__m128i mmCoeff[2];
__m128i mmMin = _mm_set1_epi16( minVal );
__m128i mmMax = _mm_set1_epi16( maxVal );
for( Int n = 0 ; n < 2 ; n++ )
mmCoeff[n] = _mm_set1_epi16( c[n] );
for( row = 0 ; row < height ; row++ )
{
for( col = 0 ; col < width ; col += 8 )
{
__m128i mmFiltered = simdInterpolateLuma2P8( src + col , cStride , mmCoeff , mmOffset , shift );
if( isLast )
{
mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
}
_mm_storeu_si128( ( __m128i * )( dst + col ) , mmFiltered );
}
src += srcStride;
dst += dstStride;
}
return;
}
else if( N == 2 && !( width & 0x03 ) )
{
Short minVal = 0;
__m128i mmOffset = _mm_set1_epi32( offset );
__m128i mmCoeff[8];
__m128i mmMin = _mm_set1_epi16( minVal );
__m128i mmMax = _mm_set1_epi16( maxVal );
for( Int n = 0 ; n < 2 ; n++ )
mmCoeff[n] = _mm_set1_epi16( c[n] );
for( row = 0 ; row < height ; row++ )
{
for( col = 0 ; col < width ; col += 4 )
{
__m128i mmFiltered = simdInterpolateLuma2P4( src + col , cStride , mmCoeff , mmOffset , shift );
if( isLast )
{
mmFiltered = simdClip3( mmMin , mmMax , mmFiltered );
}
_mm_storel_epi64( ( __m128i * )( dst + col ) , mmFiltered );
}
src += srcStride;
dst += dstStride;
}
return;
}
}
#endif
for (row = 0; row < height; row++)
{
for (col = 0; col < width; col++)
{
Int sum;
//从N/2-1个整数点开始,取N个整数点乘对应的抽头系数求和。
sum = src[ col + 0 * cStride] * c[0];
sum += src[ col + 1 * cStride] * c[1];
if ( N >= 4 )
{
sum += src[ col + 2 * cStride] * c[2];
sum += src[ col + 3 * cStride] * c[3];
}
if ( N >= 6 )
{
sum += src[ col + 4 * cStride] * c[4];
sum += src[ col + 5 * cStride] * c[5];
}
if ( N == 8 )
{
sum += src[ col + 6 * cStride] * c[6];
sum += src[ col + 7 * cStride] * c[7];
}
Pel val = ( sum + offset ) >> shift;
if ( isLast )
{
val = ( val < 0 ) ? 0 : val;//保证不小于0
val = ( val > maxVal ) ? maxVal : val; //保证不越界
}
dst[col] = val; //存储插值后的MV
}
src += srcStride;
dst += dstStride;
}
}