//半像素和1/4像素搜索 , hpel_iters 半像素搜索次数 ,qpel_iters 1/4像素搜索次数
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel )
{
const int bw = x264_pixel_size[m->i_pixel].w;
const int bh = x264_pixel_size[m->i_pixel].h;
const uint16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
const uint16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
const int i_pixel = m->i_pixel;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
const int mvy_offset = h->mb.b_interlaced & m->i_ref ? (h->mb.i_mb_y & 1)*4 - 2 : 0;
ALIGNED_ARRAY_16( pixel, pix,[64*18] ); // really 17x17x2, but round up for alignment
int bmx = m->mv[0];//做完整像素运动搜索之后预测的运动矢量
int bmy = m->mv[1];
int bcost = m->cost;
int odir = -1, bdir;
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0]+2, h->mb.mv_max_spel[0]-2 );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1]+2, h->mb.mv_max_spel[1]-2 );
if( (mx-bmx)|(my-bmy) )
COST_MV_SAD( mx, my );
}
/* halfpel diamond search */
for( int i = hpel_iters; i > 0; i-- )//从做完整像素运动搜索之后预测的运动矢量开始做半像素的diamond搜索
{
int omx = bmx, omy = bmy;
int costs[4];
int stride = 64; // candidates are either all hpel or all qpel, so one stride is enough
pixel *src0, *src1, *src2, *src3;
//得到 omx,moy周围的半像素4个点的地址
src0 = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], omx, omy-2, bw, bh+1, &m->weight[0] );
src2 = h->mc.get_ref( pix+32, &stride, m->p_fref, m->i_stride[0], omx-2, omy, bw+4, bh, &m->weight[0] );
src1 = src0 + stride;//src0为中心点的上方点,scr1为中心点的下方点
src3 = src2 + 1;//src2为中心点的左侧点,scr3为中心点的右侧点
h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0], src0, src1, src2, src3, stride, costs );//计算cost
COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx ] + p_cost_mvy[omy-2], bmy, omy-2 );
COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 );
COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy );
COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy );
if( (bmx == omx) & (bmy == omy) )
break;
}
if( !b_refine_qpel && (h->pixf.mbcmp_unaligned[0] != h->pixf.fpelcmp[0] || b_chroma_me) )
{
bcost = COST_MAX;
COST_MV_SATD( bmx, bmy, -1 );
}
/* early termination when examining multiple reference frames */
if( p_halfpel_thresh )
{
if( (bcost*7)>>3 > *p_halfpel_thresh )//如果bcost*7/8>阀值
{
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
// don't need cost_mv
return;
}
else if( bcost < *p_halfpel_thresh )
*p_halfpel_thresh = bcost;
}
/* quarterpel diamond search */
if( h->mb.i_subpel_refine != 1 )
{
bdir = -1;//做1/4像素迭代
for( int i = qpel_iters; i > 0; i-- )
{
//判断边界
if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] || bmx <= h->mb.mv_min_spel[0] || bmx >= h->mb.mv_max_spel[0] )
break;
odir = bdir;
int omx = bmx, omy = bmy;
COST_MV_SATD( omx, omy - 1, 0 );//注:在COST_MV_SATD中(dir^1)!=odir 这个条件,用来去除已经做过cost的一个点
COST_MV_SATD( omx, omy + 1, 1 );
COST_MV_SATD( omx - 1, omy, 2 );
COST_MV_SATD( omx + 1, omy, 3 );
if( (bmx == omx) & (bmy == omy) )
break;
}
}
/* Special simplified case for subme=1 */
else if( bmy > h->mb.mv_min_spel[1] && bmy < h->mb.mv_max_spel[1] && bmx > h->mb.mv_min_spel[0] && bmx < h->mb.mv_max_spel[0] )
{
int costs[4];
int omx = bmx, omy = bmy;//只做一次1/4像素搜索
/* We have to use mc_luma because all strides must be the same to use fpelcmp_x4 */
h->mc.mc_luma( pix , 64, m->p_fref, m->i_stride[0], omx, omy-1, bw, bh, &m->weight[0] );
h->mc.mc_luma( pix+16, 64, m->p_fref, m->i_stride[0], omx, omy+1, bw, bh, &m->weight[0] );
h->mc.mc_luma( pix+32, 64, m->p_fref, m->i_stride[0], omx-1, omy, bw, bh, &m->weight[0] );
h->mc.mc_luma( pix+48, 64, m->p_fref, m->i_stride[0], omx+1, omy, bw, bh, &m->weight[0] );
h->pixf.fpelcmp_x4[i_pixel]( m->p_fenc[0], pix, pix+16, pix+32, pix+48, 64, costs );
COPY2_IF_LT( bcost, costs[0] + p_cost_mvx[omx ] + p_cost_mvy[omy-1], bmy, omy-1 );
COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+1], bmy, omy+1 );
COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-1] + p_cost_mvy[omy ], bmx, omx-1, bmy, omy );
COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+1] + p_cost_mvy[omy ], bmx, omx+1, bmy, omy );
}
m->cost = bcost;//跟新cost 和mv cost_mv变量
m->mv[0] = bmx;
m->mv[1] = bmy;
m->cost_mv = p_cost_mvx[bmx] + p_cost_mvy[bmy];