算法原理
- 最临近插值算法
原理图示:
选取对应原图位置距离最近的点,当然,距离相等的点的选取,需要自定一些规则。
模块效应严重。
- 双线性插值算法
原理:
0f(x,y) = (1-x)(1-y)f(0,0)+x(1-y)f(1,0)+(1-x)yf(0,1)+xyf(1,1);
在原图中找到所属位置对应边界的四个点,将距离作为权重,加权平均。
加入了低通滤波器消除块效应,所以也会使原图受损。 - 三线性卷积插值算法
原理:
效果良好,但效率较低。
算法实现
素材:
注:算法未优化,并使用了部分ffmpeg接口和结构体。
- 最临近插值算法
算法实现:
int insert_near (AVFrame src_frame,AVFrame * dst_frame)
{
int i,j;
int srcindex = 0;
int wadd = 0;
int hadd = 0;
float wratio = dst_frame->width/src_frame->width;
float hratio = dst_frame->height/src_frame->height;
//printf("src_add = %f,src_count = %f,dst_count = %f,dst_add = %f\n",src_add,src_count,dst_count,dst_add);
for(i = 0;i < dst_frame->height;i++)
{
int h_rat = i/hratio;
if(i/hratio - h_rat>0.5)
hadd = 1;
else
hadd = 0;
for(j = 0;j < dst_frame->width;j++)
{
int w_rat = j/wratio;
if(j/wratio-w_rat>0.5)
wadd = 1;
else
wadd = 0;
dst_frame->data[0][j+idst_frame->linesize[0]] = src_frame->data[0][(int)(w_rat)+wadd+
((int)(h_rat)+hadd)src_frame->linesize[0]];
dst_frame->data[1][(j>>1)+(i>>1)dst_frame->linesize[1]] =
src_frame->data[1][(((int)(w_rat)+wadd)>>1)+(((int)(h_rat)+hadd)>>1)src_frame->linesize[1]];
dst_frame->data[2][(j>>1)+(i>>1)dst_frame->linesize[1]] =
src_frame->data[2][(((int)(w_rat)+wadd)>>1)+(((int)(h_rat)+hadd)>>1)*src_frame->linesize[1]];
}
}
}
输出效果:
-
双线性插值算法
int line_insert(AVFrame *src_frame,AVFrame * dst_frame) {int i, j; int srcindex = 0; int wadd = 0; int hadd = 0; float wratio = dst_frame->width / src_frame->width; float hratio = dst_frame->height / src_frame->height; for (i = 0; i < dst_frame->height; i++) { int h_rat = i / hratio; float last_hrat = i / hratio - h_rat; if (last_hrat > 0) hadd = 1; else hadd = 0; for (j = 0; j < dst_frame->width; j++) { int w_rat = j / wratio; float last_wrat = (float) j / wratio - w_rat; if (last_wrat > 0) wadd = 1; else wadd = 0; uint8 leftup, rightup, leftdown, rightdown; leftup = src_frame->data[0][(int) (w_rat) + ((int) (h_rat)) * src_frame->linesize[0]]; rightup = src_frame->data[0][(int) (w_rat + wadd) + ((int) (h_rat)) * src_frame->linesize[0]]; leftdown = src_frame->data[0][(int) (w_rat) + ((int) (h_rat + hadd)) * src_frame->linesize[0]]; rightdown = src_frame->data[0][(int) (w_rat + wadd) + ((int) (h_rat + hadd)) * src_frame->linesize[0]]; dst_frame->data[0][j + i * dst_frame->linesize[0]] = (1 - last_wrat) * (1 - last_hrat) * leftup + last_wrat * (1 - last_hrat) * rightup + (1 - last_wrat) * last_hrat * leftdown + last_hrat * last_wrat * rightdown; dst_frame->data[1][(j >> 1) + (i >> 1) * dst_frame->linesize[1]] = src_frame->data[1][(((int) (w_rat)) >> 1) + (((int) (i / hratio)) >> 1) * src_frame->linesize[1]]; dst_frame->data[2][(j >> 1) + (i >> 1) * dst_frame->linesize[1]] = src_frame->data[2][(((int) (w_rat)) >> 1) + (((int) (i / hratio)) >> 1) * src_frame->linesize[1]]; } } }
效果:
- 三线性卷积插值算法
使用ffmpeg的sws框架实现。
int scale_not_alloc(AVFrame* inframe,AVFrame* goalframe)
{
if(inframe==NULL)
{
printf("[@scale]:give the frame is NULL\n");
return -1;
}
struct SwsContext *img_convert_ctx;
img_convert_ctx = sws_getContext(inframe->width, inframe->height,
(enum AVPixelFormat )inframe->format, goalframe->width, goalframe->height, (enum AVPixelFormat )goalframe->format, SWS_BICUBIC,
NULL, NULL, NULL);
sws_scale(img_convert_ctx, (const uint8_t * const *) inframe->data,
inframe->linesize, 0, inframe->height,goalframe->data,
goalframe->linesize);
sws_freeContext(img_convert_ctx);
return 0;
}
效果:
效率降低了很多,虽然并没有发觉效果多好,但是该算法的使用原理上会使细节显示更为清楚,跟测试图片有关。
双线性插值算法实现比较
上面我自己实现的双线性插值算法时间为s。
- ffmpeg sws接口flag设置为SWS_BILINEAR 时间消耗大约为s/3
效果:
跟踪其源码发现在在sws_init_swscale中指定了算法函数的指针
static av_cold void sws_init_swscale(SwsContext *c) { enum AVPixelFormat srcFormat = c->srcFormat; ff_sws_init_output_funcs(c, &c->yuv2plane1, &c->yuv2planeX, &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2, &c->yuv2packedX, &c->yuv2anyX); ff_sws_init_input_funcs(c); if (c->srcBpc == 8) { if (c->dstBpc <= 14) { c->hyScale = c->hcScale = hScale8To15_c; if (c->flags & SWS_FAST_BILINEAR) { c->hyscale_fast = ff_hyscale_fast_c; c->hcscale_fast = ff_hcscale_fast_c; } } else { c->hyScale = c->hcScale = hScale8To19_c; } } else { c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c : hScale16To15_c; } ff_sws_init_range_convert(c); if (!(isGray(srcFormat) || isGray(c->dstFormat) || srcFormat == AV_PIX_FMT_MONOBLACK || srcFormat == AV_PIX_FMT_MONOWHITE)) c->needs_hcscale = 1; }
缩放图像是先横向缩放再纵向缩放。横向缩放实现:
static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) { int i; for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; int val = 0; for (j = 0; j < filterSize; j++) { val += ((int)src[srcPos + j]) * filter[filterSize * i + j]; } dst[i] = FFMIN(val >> 7, (1 << 15) - 1); // the cubic equation does overflow ... } }
- 将flag设置为SWS_FAST_BILINEAR
与SWS_BILINEAR 相比虽然效率又提升,但是差别不大。
效果:
横向和纵向算法分别是:
void ff_hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, const uint8_t *src, int srcW, int xInc) { int i; unsigned int xpos = 0; for (i = 0; i < dstWidth; i++) { register unsigned int xx = xpos >> 16; register unsigned int xalpha = (xpos & 0xFFFF) >> 9; dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha; xpos += xInc; } for (i = dstWidth - 1; (i * xInc) >> 16 >= srcW - 1; i--) dst[i] = src[srcW - 1] * 128; } void ff_hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, const uint8_t *src1, const uint8_t *src2, int srcW, int xInc) { int i; unsigned int xpos = 0; for (i = 0; i < dstWidth; i++) { register unsigned int xx = xpos >> 16; register unsigned int xalpha = (xpos & 0xFFFF) >> 9; dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha); dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha); xpos += xInc; } for (i = dstWidth - 1; (i * xInc) >> 16 >= srcW - 1; i--) { dst1[i] = src1[srcW - 1] * 128; dst2[i] = src2[srcW - 1] * 128; } }
算法还在研究中,并未完全理解。
- libyuv
时间消耗大约是s/7
输出效果:
针对Ycbcr420p 调用的函数是:
libyuv对于yuvi420的缩放最终调用的是
static void ScalePlaneSimple(int src_width, int src_height,int dst_width, int dst_height,int src_stride, int dst_stride,const uint8* src_ptr, uint8* dst_ptr) { int i; void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,&x, &y, &dx, &dy); src_width = Abs(src_width); if (src_width * 2 == dst_width && x < 0x8000) { ScaleCols = ScaleColsUp2_C; ;#if defined(HAS_SCALECOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleCols = ScaleColsUp2_SSE2; } ;#endif } for (i = 0; i < dst_height; ++i) { ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } }
同样是逐行缩放,类似于ffmpeg的先横向再纵向。
每行的缩放传递到了ScaleCols_C 来进行处理,消除了所有乘除法。
// Scales a single row of pixels using point sampling. void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[0] = src_ptr[x >> 16]; x += dx; dst_ptr[1] = src_ptr[x >> 16]; x += dx; dst_ptr += 2; } if (dst_width & 1) { dst_ptr[0] = src_ptr[x >> 16]; } }