上一篇博文给出了图像Lanczos3滤波的直观实现,但是整个算法实现过于低效,其原因在于对于每个插值点,都需要重新计算Lanczos系数,本文参考https://github.com/richgel999/imageresampler中的实现,将其C++代码转换成C实现。
优化思路:每一行其插值的x位置相同,每一列其插值的y位置相同,因此可以将x位置的插值系数做成table,将y位置的插值系数也做成table,在插值某个点 ( x , y ) (x,y) (x,y)时只需查表,即可得系数。
代码实现
数据结构准备:
#define LANCZOS_SIZE 3.0f
//Lanzcos系数
typedef struct lanc_coef_t
{
float weight;//lanzcos核函数值
int pos; //位置,可以为x或y
}lanc_coef;
//Lanzcos系数向量,存储所有的系数
typedef struct lanc_coef_vec_t
{
lanc_coef* coefs;
int size;
}lanc_coef_vec;
//某个插值点所有Lanzcos系数
typedef struct lanc_coef_node_t
{
lanc_coef* p;//某个插值点Lanzcos系数的起始点地址
int n;//非0Lanzcos系数的个数
}lanc_coef_node;
//所有的插值点Lanzcos系数向量,所有插值点所有Lanzcos系数组成的向量
typedef struct lanc_coef_node_vec_t
{
lanc_coef_node* nds;
int size;
}lanc_coef_node_vec;
//某个插值点的位置和领域的范围
typedef struct lanc_bound_t
{
float c;
int l;
int r;
}lanc_bound;
//所有插值点范围组成的向量,total为所有点系数和
typedef struct lanc_bound_vec_t
{
lanc_bound* lbs;
int size;
int total;
float scale;
}lanc_bound_vec;
//插值结果缓冲区
typedef struct real_vec_t
{
float* data;
int size;
}real_vec;
#define MAX_SCAN_BUF_SIZE 16384
typedef struct lanc_sampler_t
{
int intermediate_w;
int resample_src_w;
int resample_src_h;
int resample_dst_w;
int resample_dst_h;
float* dst_buf;
float* tmp_buf;
int delay_x_resample;
int* src_y_count;
uint8_t* src_y_flag;
int scan_buf_y[MAX_SCAN_BUF_SIZE];
float* scan_buf_l[MAX_SCAN_BUF_SIZE];
int cur_src_y;
int cur_dst_y;
}lanc_sampler;
相关数据结构创建与释放:
real_vec* create_real_vec(int size);
void destroy_real_vec(real_vec** _rv);
real_vec* create_real_vec(int size)
{
if (size < 0)return NULL;
real_vec* rv = (real_vec*)calloc(1, sizeof(real_vec));
if (!rv) return rv;
rv->data = (float*)calloc(1, size*sizeof(float));
if (!rv->data)
{
destroy_real_vec(&rv);
return NULL;
}
rv->size = size;
return rv;
}
void destroy_real_vec(real_vec** _rv)
{
if (!_rv)return;
real_vec* rv = *_rv;
if (!rv) return;
if (rv->data)
{
free(rv->data);
rv->data = NULL;
}
free(rv);
*_rv = NULL;
}
void destroy_lanc_coef_vev(lanc_coef_vec** _lcv)
{
if (!_lcv)return;
lanc_coef_vec* lcv = *_lcv;
if (lcv)
{
if (lcv->coefs)
free(lcv->coefs);
free(lcv);
}
*_lcv = NULL;
}
lanc_coef_vec* create_lanc_coef_vec(int size)
{
lanc_coef_vec* lcv = NULL;
lcv = (lanc_coef_vec*)calloc(1, sizeof(lanc_coef_vec));
if (!lcv)return lcv;
lanc_coef* coefs = (lanc_coef*)calloc(size, sizeof(lanc_coef));
if (!coefs)
{
destroy_lanc_coef_vev(&lcv);
return NULL;
}
lcv->coefs = coefs;
lcv->size = size;
return lcv;
}
void destroy_lanc_bound_vec(lanc_bound_vec** _lbv)
{
if (!_lbv)return;
lanc_bound_vec* lbv = *_lbv;
if (lbv)
{
if (lbv->lbs)
free(lbv->lbs);
free(lbv);
}
*_lbv = NULL;
}
lanc_bound_vec* create_bound_vec(int size)
{
lanc_bound_vec* lbv = NULL;
lbv = (lanc_bound_vec*)calloc(1, sizeof(lanc_bound_vec));
if (!lbv)return lbv;
lanc_bound* bs = (lanc_bound*)calloc(size, sizeof(lanc_bound));
if (!bs)
{
destroy_lanc_bound_vec(&lbv);
return NULL;
}
lbv->lbs = bs;
lbv->size = size;
return lbv;
}
void destroy_lanc_coef_node_vev(lanc_coef_node_vec** _cnv)
{
if (!_cnv)return;
lanc_coef_node_vec* cnv = *_cnv;
if (cnv)
{
if (cnv->nds)
free(cnv->nds);
free(cnv);
}
*_cnv = NULL;
}
lanc_coef_node_vec* create_lanc_coef_node_vec(int size)
{
lanc_coef_node_vec* cnv = NULL;
cnv = (lanc_coef_node_vec*)calloc(1, sizeof(lanc_coef_node_vec));
if (!cnv)return cnv;
lanc_coef_node* nds = (lanc_coef_node*)calloc(size, sizeof(lanc_coef_node));
if (!nds)
{
destroy_lanc_coef_vev(&cnv);
return NULL;
}
cnv->nds = nds;
cnv->size = size;
return cnv;
}
Lanzcos3滤波核函数实现:
static double sinc(double x)
{
x = (x * M_PI);
if ((x < 0.01f) && (x > -0.01f))
return 1.0f + x*x*(-1.0f / 6.0f + x*x*1.0f / 120.0f);
return sin(x) / x;
}
static float clean(double t)
{
const float EPSILON = .0000125f;
if (fabs(t) < EPSILON)
return 0.0f;
return (float)t;
}
static float lanczos_coef(float t)
{
if (t < 0.0f)
t = -t;
if (t < 3.0f)
return clean(sinc(t) * sinc(t / 3.0f));
else
return (0.0f);
}
生成x或y方向上每个插值点的邻域范围,c表示当前插值点,l表示领域的左侧,对y方向来说是上侧,r表示领域右侧,对y方向是下侧.
static void make_lan_bounds(lanc_bound_vec* bv, float scale)
{
int total = 0;
int i;
float hw;
int size;
int l, r;
float c;
lanc_bound bound;
if (!bv)return;
if (scale > 1.0f)
hw = LANCZOS_SIZE;
else
hw = LANCZOS_SIZE / scale;
size = bv->size;
for (i = 0; i < size; i++)
{
c = (float)i / scale;
l = (int)floor(c - hw);
r = (int)ceil(c + hw);
bound.c = c;
bound.l = l;
bound.r = r;
total += (r - l + 1);
bv->lbs[i] = bound;
}
bv->total = total;
if (scale >= 1.0f)
scale = 1.0f;
bv->scale = scale;
return;
}
计算每个插值点Lanzcos核函数值:
static void make_lanc_coef_nodes(lanc_coef_vec* cv, lanc_bound_vec* bv, lanc_coef_node_vec* nv, int img_size)
{
if (!cv || !bv || !nv)return;
float c;
int l, r;
lanc_bound bound;
lanc_coef_node node;
float s, norm;
float coef;
int i, j;
int k, max_k;
float max_coef;
int pos;
int size = bv->size;
lanc_coef* next = cv->coefs;
for (i = 0; i < size; i++)
{
bound = bv->lbs[i];
l = bound.l;
r = bound.r;
c = bound.c;
s = 0.0;
//累加系数值,需要归一化
for (j = l; j <= r; j++)
{
coef = lanczos_coef((c - (float)j)*bv->scale);
s += coef;
}
norm = 1.0f / s;
node = nv->nds[i];
node.n = 0;
node.p = next;
next += (r - l + 1);
s = 0.0f;
max_coef = FLT_MIN;
max_k = -1;
//计算归一化系数,s为系数和,找到系数最大值和位置,如果s不等于1,将1-s值加上最大值,作为修正,为了省去没必要的乘法,只是保留非0值
for (j = l; j <= r; j++)
{
coef = lanczos_coef((c - (float)j)*bv->scale);
coef *= norm;
if (coef == 0.0f)
continue;
pos = j;
if (j < 0)pos = 0;
if (j >= img_size)
pos = img_size - 1;
k = node.n++;
node.p[k].pos = pos;
node.p[k].weight = coef;
s += coef;
if (max_coef < coef)
{
max_coef = coef;
max_k = k;
}
}
if (max_k == -1 || node.n == 0)
{
printf("make_lanc_coef_nodes fail\n");
return;
}
if (s != 1.0f)
{
node.p[max_k].weight += (1.0 - s);
}
nv->nds[i] = node;
}
}
整合上面的方法,生成x和y方向的插值系数:
void make_lanc_coef(uint8_2d* src, uint8_2d* dst, lanc_coef_node_vec** _xcnv, lanc_coef_node_vec** _ycnv)
{
lanc_coef_vec* xcv = NULL;
lanc_coef_vec* ycv = NULL;
lanc_bound_vec* xbv = NULL;
lanc_bound_vec* ybv = NULL;
lanc_coef_node_vec* xcnv = NULL;
lanc_coef_node_vec* ycnv = NULL;
if (!src || !dst)return;
float scale;
int dw, dh;
int sw, sh;
dw = dst->cols;
dh = dst->rows;
sw = src->cols;
sh = src->rows;
scale = (float)dw / (float)sw;
xbv = create_bound_vec(dw);
ybv = create_bound_vec(dh);
xcnv = create_lanc_coef_node_vec(dw);
ycnv = create_lanc_coef_node_vec(dh);
if (!xbv || !ybv || !xcnv || !ycnv)
{
destroy_lanc_bound_vec(&xbv);
destroy_lanc_bound_vec(&ybv);
destroy_lanc_coef_node_vev(&xcnv);
destroy_lanc_coef_node_vev(&ycnv);
return;
}
make_lan_bounds(xbv, scale);
scale = (float)dh / (float)sh;
make_lan_bounds(ybv, scale);
xcv = create_lanc_coef_vec(xbv->total);
ycv = create_lanc_coef_vec(ybv->total);
if (!xcv || !ycv)
{
destroy_lanc_bound_vec(&xbv);
destroy_lanc_bound_vec(&ybv);
destroy_lanc_coef_vev(&xcv);
destroy_lanc_coef_vev(&ycv);
return;
}
make_lanc_coef_nodes(xcv, xbv, xcnv,sw);
make_lanc_coef_nodes(ycv, ybv, ycnv,sh);
destroy_lanc_bound_vec(&xbv);
destroy_lanc_bound_vec(&ybv);
*_xcnv = xcnv;
*_ycnv = ycnv;
}
创建采样器:
void destroy_lanc_sampler(lanc_sampler** _sampler)
{
if (!_sampler)return;
lanc_sampler* sampler = *_sampler;
if (sampler)
{
if (sampler->src_y_count)
free(sampler->src_y_count);
if (sampler->src_y_flag)
free(sampler->src_y_flag);
if (sampler->dst_buf)
free(sampler->dst_buf);
if (sampler->tmp_buf)
free(sampler->tmp_buf);
free(sampler);
}
*_sampler = NULL;
}
//边界检查,可以是x或y
static inline int resampler_range_check(int v, int h) { (void)h; assert((v >= 0) && (v < h)); return v; }
//计算操作数量,程序规模估算用
inline int count_ops(lanc_coef_node_vec* cnv, int k)
{
int i, t = 0;
for (i = 0; i < k; i++)
t += cnv->nds[i].n;
return (t);
}
//创建采样器
lanc_sampler* create_lanc_sampler(uint8_2d* src, uint8_2d* dst, lanc_coef_node_vec* xcnv, lanc_coef_node_vec* ycnv)
{
if (!src || !dst)return;
lanc_sampler* sampler = NULL;
sampler = (lanc_sampler*)calloc(1, sizeof(lanc_sampler));
if (!sampler)return sampler;
sampler->resample_src_w = src->cols;
sampler->resample_src_h = src->rows;
sampler->resample_dst_w = dst->cols;
sampler->resample_dst_h = dst->rows;
sampler->dst_buf = (float*)calloc(dst->cols, sizeof(float));
if (!sampler->dst_buf)
{
destroy_lanc_sampler(&sampler);
return NULL;
}
sampler->src_y_count = (int*)calloc(sampler->resample_src_h, sizeof(int));
if (!sampler->src_y_count)
{
destroy_lanc_sampler(&sampler);
return NULL;
}
sampler->src_y_flag = (uint8_t*)calloc(sampler->resample_src_h, sizeof(uint8_t));
if (!sampler->src_y_flag)
{
destroy_lanc_sampler(&sampler);
return NULL;
}
int i, j;
for (i = 0; i < sampler->resample_dst_h; i++)
for (j = 0; j < ycnv->nds[i].n; j++)
sampler->src_y_count[resampler_range_check(ycnv->nds[i].p[j].pos, sampler->resample_src_h)]++;
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
{
sampler->scan_buf_y[i] = -1;
sampler->scan_buf_l[i] = NULL;
}
{
// Determine which axis to resample first by comparing the number of multiplies required
// for each possibility.
int x_ops = count_ops(xcnv, dst->cols);
int y_ops = count_ops(ycnv, dst->rows);
// Hack 10/2000: Weight Y axis ops a little more than X axis ops.
// (Y axis ops use more cache resources.)
int xy_ops = x_ops * sampler->resample_src_h +
(4 * y_ops * sampler->resample_dst_w) / 3;
int yx_ops = (4 * y_ops * sampler->resample_src_w) / 3 +
x_ops * sampler->resample_dst_h;
#if TEST
printf("src: %i %i\n", sampler->resample_src_w, sampler->resample_src_h);
printf("dst: %i %i\n", sampler->resample_dst_w, sampler->resample_dst_h);
printf("x_ops: %i\n", x_ops);
printf("y_ops: %i\n", y_ops);
printf("xy_ops: %i\n", xy_ops);
printf("yx_ops: %i\n", yx_ops);
#endif
//根据x、y方向的操作计算,决定x先插值或y方向先插值
// Now check which resample order is better. In case of a tie, choose the order
// which buffers the least amount of data.
if ((xy_ops > yx_ops) ||
((xy_ops == yx_ops) && (sampler->resample_src_w < sampler->resample_dst_w))
)
{
sampler->delay_x_resample = 1;
sampler->intermediate_w = sampler->resample_src_w;
}
else
{
sampler->delay_x_resample = 0;
sampler->intermediate_w = sampler->resample_dst_w;
}
#if TEST
printf("delaying: %d\n", sampler->delay_x_resample);
#endif
}
if (sampler->delay_x_resample)
{
sampler->tmp_buf = (float*)malloc(sampler->intermediate_w * sizeof(float));
if (!sampler->tmp_buf)
{
destroy_lanc_sampler(&sampler);
return NULL;
}
}
return sampler;
}
插值x方向,读入一行,输出一行:
static void lanc_sample_x(float* src, float* dst, lanc_sampler* sampler, lanc_coef_node_vec* xcnv)
{
int i, j;
float total;
lanc_coef* p;
if (!xcnv)return;
for (i = 0; i < sampler->resample_dst_w; i++)
{
total = 0.0;
p = xcnv->nds[i].p;
for (j = 0; j < xcnv->nds[i].n; j++)
{
total += src[p[j].pos] * p[j].weight;
}
*dst++ = total;
}
}
生成一行最终结果:
static void put_line(real_vec* buf, lanc_sampler* sampler, lanc_coef_node_vec* xcnv)
{
if (!buf || !sampler || !xcnv)
return;
int i;
if (sampler->cur_src_y >= sampler->resample_src_h)
return;
/* Does this source line contribute
* to any destination line? if not,
* exit now.
*/
if (!sampler->src_y_count[resampler_range_check(sampler->cur_src_y, sampler->resample_src_h)])
{
sampler->cur_src_y++;
return ;
}
/* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
if (sampler->scan_buf_y[i] == -1)
break;
/* If the buffer is full, exit with an error. */
if (i >= MAX_SCAN_BUF_SIZE)
{
return ;
}
sampler->src_y_flag[resampler_range_check(sampler->cur_src_y, sampler->resample_src_h)] = 1;
sampler->scan_buf_y[i] = sampler->cur_src_y;
/* Does this slot have any memory allocated to it? */
if (!sampler->scan_buf_l[i])
{
sampler->scan_buf_l[i] = (float*)calloc(sampler->intermediate_w,sizeof(float));
if (!sampler->scan_buf_l[i])
{
return;
}
}
// Resampling on the X axis first?
if (sampler->delay_x_resample)
{
assert(sampler->intermediate_w == sampler->resample_src_w);
// Y-X resampling order
memcpy(sampler->scan_buf_l[i], buf->data, sampler->intermediate_w * sizeof(float));
}
else
{
assert(sampler->intermediate_w == sampler->resample_dst_w);
// X-Y resampling order
lanc_sample_x(buf->data,sampler->scan_buf_l[i],sampler, xcnv);
}
sampler->cur_src_y++;
}
//乘以系数后移动到缓冲区
static void scale_y_mov(float* Ptmp, const float* Psrc, float weight, int dst_x)
{
int i;
for (i = dst_x; i > 0; i--)
*Ptmp++ = *Psrc++ * weight;
}
//一行累加到另一行
static void scale_y_add(float* Ptmp, const float* Psrc, float weight, int dst_x)
{
for (int i = dst_x; i > 0; i--)
(*Ptmp++) += *Psrc++ * weight;
}
截取范围:
static inline float clamp_sample(float f)
{
if (f < 0.0f)
f = 0.0f;
else if (f > 1.0f)
f = 1.0f;
return f;
}
static void clamp(float* Pdst, int n)
{
while (n > 0)
{
*Pdst = clamp_sample(*Pdst);
++Pdst;
n--;
}
}
插值y方向:
static void lanc_sample_y(lanc_sampler* sampler, lanc_coef_node_vec* xcnv,lanc_coef_node_vec* ycnv)
{
int i, j;
float* Psrc;
//Contrib_List* Pclist = &m_Pclist_y[cur_dst_y];
if (!sampler || !ycnv || !xcnv)return;
lanc_coef_node node = ycnv->nds[sampler->cur_dst_y];
float* Ptmp = sampler->delay_x_resample ? sampler->tmp_buf : sampler->dst_buf;
assert(Ptmp);
/* Process each contributor. */
for (i = 0; i < node.n; i++)
{
/* locate the contributor's location in the scan
* buffer -- the contributor must always be found!
*/
for (j = 0; j < MAX_SCAN_BUF_SIZE; j++)
if (sampler->scan_buf_y[j] == node.p[i].pos)
break;
assert(j < MAX_SCAN_BUF_SIZE);
Psrc = sampler->scan_buf_l[j];
if (!i)
scale_y_mov(Ptmp, Psrc, node.p[i].weight, sampler->intermediate_w);
else
scale_y_add(Ptmp, Psrc, node.p[i].weight, sampler->intermediate_w);
/* If this source line doesn't contribute to any
* more destination lines then mark the scanline buffer slot
* which holds this source line as free.
* (The max. number of slots used depends on the Y
* axis sampling factor and the scaled filter width.)
*/
if (--sampler->src_y_count[resampler_range_check(node.p[i].pos, sampler->resample_src_h)] == 0)
{
sampler->src_y_flag[resampler_range_check(node.p[i].pos, sampler->resample_src_h)] = 0;
sampler->scan_buf_y[j] = -1;
}
}
/* Now generate the destination line */
if (sampler->delay_x_resample) // Was X resampling delayed until after Y resampling?
{
assert(sampler->dst_buf != Ptmp);
lanc_sample_x(Ptmp, sampler->dst_buf,sampler, xcnv);
}
else
{
assert(sampler->dst_buf == Ptmp);
}
clamp(sampler->dst_buf, sampler->resample_dst_w);
}
获取一行结果:
static float* get_line(lanc_sampler* sampler, lanc_coef_node_vec* xcnv,lanc_coef_node_vec* ycnv)
{
int i;
/* If all the destination lines have been
* generated, then always return NULL.
*/
if (!sampler || !ycnv)return NULL;
if (sampler->cur_dst_y == sampler->resample_dst_h)
return NULL;
/* Check to see if all the required
* contributors are present, if not,
* return NULL.
*/
lanc_coef* p;
int n = ycnv->nds[sampler->cur_dst_y].n;
for (i = 0; i < n; i++)
{
p = ycnv->nds[sampler->cur_dst_y].p;
if (!sampler->src_y_flag[resampler_range_check(p[i].pos, sampler->resample_src_h)])
return NULL;
}
lanc_sample_y(sampler, xcnv, ycnv);
sampler->cur_dst_y++;
return sampler->dst_buf;
}
整合所有方法:
void resize_using_lanczos(uint8_2d* src, uint8_2d* dst)
{
int src_rows, src_cols;
int dst_rows, dst_cols;
int i, j;
int a = 3;
if (!src || !dst)return;
lanc_coef_node_vec* xcnv = NULL;
lanc_coef_node_vec* ycnv = NULL;
lanc_sampler* sampler = NULL;
real_2d* s = NULL;
real_2d* d = NULL;
real_vec* row_buf = NULL;
s = uint8_to_real(src->data, src->cols, src->rows);
d = create_real_2d(dst->cols, dst->rows);
row_buf = create_real_vec(src->cols);
if (!s || !d || !row_buf)
{
destroy_real_vec(&row_buf);
destroy_real_2d(&s);
destroy_real_2d(&d);
return;
}
make_lanc_coef(src,dst, &xcnv, &ycnv);
normalize_real_2d(s, 255.0f);
sampler = create_lanc_sampler(src, dst, xcnv, ycnv);
int rows, cols;
int row = 0;
uint32_t row_bytes;
rows = src->rows;
cols = src->cols;
row_bytes = cols*sizeof(float);
for (i = 0; i < rows; i++)
{
memcpy(row_buf->data, s->arr[i], row_bytes);
put_line(row_buf,sampler, xcnv);
for (;;)
{
float* out = get_line(sampler, xcnv, ycnv);
if (!out)
break;
else
{
for (int x = 0; x < dst->cols; x++)
{
dst->arr[row][x] = (uint8_t)(out[x] * 255.0f);
}
row++;
}
}
}
destroy_lanc_coef_node_vev(&xcnv);
destroy_lanc_coef_node_vev(&ycnv);
destroy_lanc_sampler(&sampler);
destroy_real_vec(&row_buf);
destroy_real_2d(&s);
destroy_real_2d(&d);
}
测试:
void test_lancos_resize(dai_image* img, float factor)
{
uint8_2d* r = NULL;
uint8_2d* g = NULL;
uint8_2d* b = NULL;
uint8_2d* r1 = NULL;
uint8_2d* g1 = NULL;
uint8_2d* b1 = NULL;
dai_image* img1 = NULL;
if (!img)return;
split_img_data(img, &r, &g, &b);
int w, h;
int w1, h1;
w = img->width;
h = img->height;
w1 = factor*w;
h1 = factor*h;
r1 = create_uint8_2d(h1, w1);
g1 = create_uint8_2d(h1, w1);
b1 = create_uint8_2d(h1, w1);
resize_using_lanczos(b, b1);
resize_using_lanczos(g, g1);
resize_using_lanczos(r, r1);
merge_img_data(r1, g1, b1, &img1);
if (img1)
{
img1->type = EJPEG;
dai_save_image("resize_lancos3.jpg", img1);
dai_destroy_image(&img1);
}
destroy_uint8_2d(&r);
destroy_uint8_2d(&g);
destroy_uint8_2d(&b);
destroy_uint8_2d(&r1);
destroy_uint8_2d(&g1);
destroy_uint8_2d(&b1);
}