非线性滤波器在通常情况下没有特定的转移函数。一类比较重要的非线性滤波就是统计排序滤波器,统计排序滤波器即对窗口内的像素值进行排序并通过多路选择器选择使用排序后的值,例如中值滤波、最大/最小值滤波等。排序滤波器或者其组合,可以在很多图像处理的场合得到应用。用接近中间位置的排序值作为输出,进行图像的平滑滤波,能得到很好的噪声平滑性质,中值滤波对去除椒盐噪声十分有用,而形态学滤波中主要用到的算子就是最大/最小值滤波。
统计排序滤波的数学定义如下:设 r r r为处理窗口的半径,设 I ( x , y ) I(x,y) I(x,y)为输入像素值, g ( x , y ) g(x,y) g(x,y)为输出像素值,则有如下定义:
g ( x , y ) = S o r t ( I ( x + i , y + j ) , n ) , − r ≤ i ≤ r , − r ≤ j ≤ r , 0 ≤ n < ( 2 r + 1 ) 2 g(x,y)=Sort(I(x+i,y+j),n),-r\leq i \leq r,-r\leq j \leq r,0\leq n <(2r+1)^2 g(x,y)=Sort(I(x+i,y+j),n),−r≤i≤r,−r≤j≤r,0≤n<(2r+1)2
上式中, S o r t Sort Sort算子代表对 i i i和 j j j的有效区域进行排序运算,同时输出排序结果的n个值。由数学定义不难看出,排序滤波器主要完成对图像当前窗口内的所有像素进行排序,同时按照指定输出排序结果。若令 n = ( 2 r + 1 ) 2 / 2 n = (2r+1)^2/2 n=(2r+1)2/2,则上式则变成中值滤波器,若排序结果按升序排列, n = 0 n=0 n=0,则为最小值滤波器。同样,若 n = ( 2 r + 1 ) 2 − 1 n = (2r+1)^2-1 n=(2r+1)2−1,则为最大值滤波器。
基于以上介绍,关于中值滤波如何在FPGA中实现,相信已经有了大致的思绪,关键点在于:
`timescale 1ns / 1ps
module VIP_Matrix_Generate_5X5_8bit(
//global clock
input clk, //cmos video pixel clock
input rst_n, //global reset
//Image data prepred to be processd
input per_frame_vsync, //Prepared Image data vsync valid signal
input per_frame_href, //Prepared Image data href vaild signal
input per_frame_clken, //Prepared Image data output/capture enable clock
input [7:0] per_img_Y, //Prepared Image brightness input
//Image data has been processd
output matrix_frame_vsync, //Prepared Image data vsync valid signal
output matrix_frame_href, //Prepared Image data href vaild signal
output matrix_frame_clken, //Prepared Image data output/capture enable clock
output reg [7:0] matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15, //5X5 Matrix output
output reg [7:0] matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25,
output reg [7:0] matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35,
output reg [7:0] matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45,
output reg [7:0] matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55
);
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
//sync row3_data with per_frame_clken & row1_data & raw2_data
wire [7:0] row1_data; //frame data of the 1th row
wire [7:0] row2_data; //frame data of the 2th row
wire [7:0] row3_data; //frame data of the 3th row
wire [7:0] row4_data; //frame data of the 4th row
reg [7:0] row5_data; //frame data of the 5th row
//***********************************************************************此处移位寄存器落后一个时钟周期
always@(posedge clk or negedge rst_n)
begin
if(!rst_n)
row5_data <= 0;
else
begin
if(per_frame_clken) //将输入的信号用像素使能时钟同步一拍,以保证数据与Shift_RAM输出的数据保持同步
row5_data <= per_img_Y;
else
row5_data <= row5_data;
end
end
//借助4个移位寄存器进行串联
c_shift_ram_0 c_shift_ram_row4 (
.D(row5_data), // input wire [7 : 0] D
.CLK(clk), // input wire CLK
.CE(per_frame_clken), // input wire CE
.Q(row4_data) // output wire [7 : 0] Q
);
c_shift_ram_0 c_shift_ram_row3 (
.D(row4_data), // input wire [7: 0] D
.CLK(clk), // input wire CLK
.CE(per_frame_clken), // input wire CE
.Q(row3_data) // output wire [7 : 0] Q
);
c_shift_ram_0 c_shift_ram_row2 (
.D(row3_data), // input wire [7 : 0] D
.CLK(clk), // input wire CLK
.CE(per_frame_clken), // input wire CE
.Q(row2_data) // output wire [7 : 0] Q
);
c_shift_ram_0 c_shift_ram_row1 (
.D(row2_data), // input wire [7 : 0] D
.CLK(clk), // input wire CLK
.CE(per_frame_clken), // input wire CE
.Q(row1_data) // output wire [7 : 0] Q
);
//------------------------------------------
//lag 2 clocks signal sync
reg [1:0] per_frame_vsync_r;
reg [1:0] per_frame_href_r;
reg [1:0] per_frame_clken_r;
always@(posedge clk or negedge rst_n)
begin
if(!rst_n)
begin
per_frame_vsync_r <= 0;
per_frame_href_r <= 0;
per_frame_clken_r <= 0;
end
else
begin
per_frame_vsync_r <= {per_frame_vsync_r[0], per_frame_vsync};
per_frame_href_r <= {per_frame_href_r[0], per_frame_href};
per_frame_clken_r <= {per_frame_clken_r[0], per_frame_clken};
end
end
//为了简化运算,放弃掉第1、2行的边缘数据
//为了简化运算,放弃掉每一行的前两个像素点
//为什么要将移位寄存器的读使能延迟一个时钟周期呢?要搞清楚我要传入矩阵的数据在何时为有效数据,我需要的是row5_data,而他只落后于per_img_Y一个时钟周期
wire read_frame_href = per_frame_href_r[0]; //RAM read href sync signal
wire read_frame_clken = per_frame_clken_r[0]; //RAM read enable
//为什么要将矩阵的读使能延迟两个时钟周期呢?要搞清楚我的矩阵在何时为有效数据,只要matrix_p55传入第一个数据,便为有效数据,而他落后于row5_data一个时钟周期
assign matrix_frame_vsync = per_frame_vsync_r[1];
assign matrix_frame_href = per_frame_href_r[1];
assign matrix_frame_clken = per_frame_clken_r[1];
//---------------------------------------------------------------------------
//---------------------------------------------------
/***********************************************该处得到5x5矩阵又落后了一个时钟周期
(1) Read data from Shift_RAM
(2) Caculate the Sobel
(3) Steady data after Sobel generate
************************************************/
always@(posedge clk or negedge rst_n)
begin
if(!rst_n) begin
{matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15} <= 40'd0;
{matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25} <= 40'd0;
{matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35} <= 40'd0;
{matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45} <= 40'd0;
{matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55} <= 40'd0;
end
else if(read_frame_href) begin
if(read_frame_clken) begin//Shift_RAM data read clock enable
{matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15} <= {matrix_p12, matrix_p13,matrix_p14, matrix_p15,row1_data}; //1th shift input
{matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25} <= {matrix_p22, matrix_p23,matrix_p24, matrix_p25,row2_data}; //2th shift input
{matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35} <= {matrix_p32, matrix_p33,matrix_p34, matrix_p35,row3_data}; //3th shift input
{matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45} <= {matrix_p42, matrix_p43,matrix_p44, matrix_p45,row4_data}; //4th shift input
{matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55} <= {matrix_p52, matrix_p53,matrix_p54, matrix_p55,row5_data}; //5th shift input
end
else begin
{matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15} <= {matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15};
{matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25} <= {matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25};
{matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35} <= {matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35};
{matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45} <= {matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45};
{matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55} <= {matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55};
end
end
else begin
{matrix_p11, matrix_p12, matrix_p13,matrix_p14, matrix_p15} <= 40'd0;
{matrix_p21, matrix_p22, matrix_p23,matrix_p24, matrix_p25} <= 40'd0;
{matrix_p31, matrix_p32, matrix_p33,matrix_p34, matrix_p35} <= 40'd0;
{matrix_p41, matrix_p42, matrix_p43,matrix_p44, matrix_p45} <= 40'd0;
{matrix_p51, matrix_p52, matrix_p53,matrix_p54, matrix_p55} <= 40'd0;
end
end
endmodule
目前我们得到了一个 5 × 5 5\times5 5×5大小的窗口,接下来就是排序算法的设计,如何获取窗口的中间值呢?
由于在FPGA的图像处理领域,中值滤波的处理窗口不会太大,消耗的资源也不会太大,因此,在选择排序方法时优先考虑时间开销比较小的算法,故采用并行全比较排序算法,其基本原理就是在同一时刻完成所有数据与其他数据的比较结果,因此其时间复杂度最小,仅需一个时钟即可完成排序工作,很适合FPGA流水线处理。现假定要对 n n n个数据 d 0 , d 1 , d 2 , . . . , d n d0,d1,d2,...,dn d0,d1,d2,...,dn进行排序,那么进行并行排序的步骤如下。
(1)同时得到这 n n n个数,即对这 n n n个数进行对齐。
(2)同时将这 n n n个数分别与其他书做比较,并记录比较结果。不妨规定:若当前数大于其他数,则将结果记为1;若当前数小于等于其他数,则将结果记为0.
(3)计算每个数第(2)步的所有结果之和:由于一共是 n n n个数目,因此,必然有 n − 1 n-1 n−1个比较结果。
(4)第(3)步结果的值即为排序结果
但是这样做,会有一个缺点就是假设对5个数进行排序,而这5个数完全一致,那么只能得到最大值,而没法得到中值或者其他序列的输出,因此对于相同的数值,必须找出其“异性点”进行区别对待。一个明显的“异性”便是各个数值的输入次序。现做如下规定:
(1)当前数目大于本数据之前输入数据时,结果为1,小于或等于时记为0。
(2)当前数目大于等于本数据之后输入数据时,结果记为1,小于时记为0。
如此一来,便不会出现上述情况。下面就是对窗口内一行5个数据进行排序的代码展示:
`timescale 1ns / 1ps
//该模块实现对5X5模块中的一行数据,也就是5个数据进行排序,输出最大值、次最大值,中间值,次最小值,最小值
//该模块实现了消耗6个CLK,得到5个数据的中位数
module VIP_Sort_1d(
input clk,
input rst_n,
input din_valid,
input [7:0] din_1,din_2,din_3,din_4,din_5,
output dout_valid,
output [7:0] dout_max,dout_max_sec,dout_mid,dout_min_sec,dout_min
);
parameter KSZ = 5 ; //需要排列的数据个数
parameter MAX_NUM = 8;
parameter OUT_ID = (KSZ >> 1); // 待输出的排序ID
reg [MAX_NUM-1:0] cmp_sum[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r2[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r3[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r4[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r5[0:KSZ-1];
reg [7:0] din[0:KSZ-1]; //定义一个存储空间,将输入数据缓存起来
reg [7:0] dout_temp_mid;
reg [7:0] dout_temp_max;
reg [7:0] dout_temp_max_sec;
reg [7:0] dout_temp_min;
reg [7:0] dout_temp_min_sec;
always @ (posedge clk or negedge rst_n) begin //消耗了1个CLK
if(!rst_n) begin
din[0] <= 8'd0;
din[1] <= 8'd0;
din[2] <= 8'd0;
din[3] <= 8'd0;
din[4] <= 8'd0;
end
else if(din_valid == 1'b1)begin
din[0] <= din_1;
din[1] <= din_2;
din[2] <= din_3;
din[3] <= din_4;
din[4] <= din_5;
end
end
reg [39:0] din_1_r,din_2_r,din_3_r,din_4_r,din_5_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
din_1_r <= 40'd0;
din_2_r <= 40'd0;
din_3_r <= 40'd0;
din_4_r <= 40'd0;
din_5_r <= 40'd0;
end
else begin
din_1_r <= {din_1_r[31:0],din_1};
din_2_r <= {din_2_r[31:0],din_2};
din_3_r <= {din_3_r[31:0],din_3};
din_4_r <= {din_4_r[31:0],din_4};
din_5_r <= {din_5_r[31:0],din_5};
end
end
//关键比较代码如下:
//将第一个数据与其他数据做比较,结果存放在cmp_result[0]中
reg cmp_result[0:KSZ-1][0:KSZ-1]; //比较中间结果信号
generate
begin:xhdl1
genvar i;
for(i = 1;i <= KSZ - 1;i = i + 1)
begin:CMP_1st
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_result[0][i] <= 1'b0;
cmp_result[0][0] <= 1'b0;
end
else
cmp_result[0][i] <= (din[0] >= din[i])?1'b1:1'b0;
end
end
end
endgenerate
//其他数据的比较电路
generate
begin:xhdl4
genvar i;
for(i = 2;i <= KSZ;i = i + 1) //除了第一个数据的总共KSZ-1个数据
begin:CMP_Others
begin: xhdl2
genvar j;
for(j = 1;j <= i - 1;j = j + 1)
begin:CMP_Previous //与本数据之前的数据作比较
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_result[i-1][j-1] <= 1'b0;
cmp_result[i-1][i-1] <= 1'b0;
end
else
cmp_result[i-1][j-1] <= (din[i - 1] > din[j - 1])?1'b1:1'b0;
end
end
end
begin:xhdl3
genvar j;
for(j = i + 1;j <= KSZ;j = j + 1)
begin:CMP_After //与本数据之后的数据作比较
always @ (posedge clk or negedge rst_n) begin
if(!rst_n)
cmp_result[i-1][j-1] <= 1'b0;
else
cmp_result[i-1][j-1] <= (din[i - 1] >= din[j - 1])?1'b1:1'b0;
end
end
end
end
end
endgenerate
//将数据有效信号延迟6个CLK
reg [5:0] din_valid_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n)
din_valid_r <= 6'd0;
else
din_valid_r <= {din_valid_r[4:0],din_valid};
end
//将比较结果进行相加
generate
begin:xhdl5
genvar i;
for(i = 0;i <= KSZ - 1;i = i + 1)
begin:CMP_r_sum
if(KSZ == 5)
begin:sum_ksz_5
always @(posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_sum[i] <= {MAX_NUM{1'b0}};
cmp_sum_r[i] <= {MAX_NUM{1'b0}};
cmp_sum_r2[i] <= {MAX_NUM{1'b0}};
cmp_sum_r3[i] <= {MAX_NUM{1'b0}};
cmp_sum_r4[i] <= {MAX_NUM{1'b0}};
cmp_sum_r5[i] <= {MAX_NUM{1'b0}};
end
else begin
if(din_valid_r[1] == 1'b1) begin
cmp_sum_r[i] <= (cmp_result[i][0])+(cmp_result[i][4]);
cmp_sum_r2[i] <= (cmp_result[i][1])+(cmp_result[i][3]);
cmp_sum_r3[i] <= (cmp_result[i][2]);
end
if(din_valid_r[2] == 1'b1) begin
cmp_sum_r4[i] <= cmp_sum_r[i] + cmp_sum_r2[i];
cmp_sum_r5[i] <= cmp_sum_r3[i] ;
end
if(din_valid_r[3] == 1'b1)
cmp_sum[i] <= cmp_sum_r4[i] + cmp_sum_r5[i];
end
end
end
end
end
if(KSZ == 5)
begin:dout_ksz_5
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
dout_temp_mid <= 8'd0;
dout_temp_min_sec<= 8'd0;
dout_temp_max <= 8'd0;
dout_temp_max_sec<= 8'd0;
dout_temp_min <= 8'd0;
end
else begin
if(din_valid_r[4] == 1'b1) begin
if(cmp_sum[0] == OUT_ID + 2) //求解最大值
dout_temp_max <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID + 2)
dout_temp_max <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID + 2)
dout_temp_max <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID + 2)
dout_temp_max <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID + 2)
dout_temp_max <= din_5_r[39:32];
if(cmp_sum[0] == OUT_ID + 1) //求解次最大值
dout_temp_max_sec <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID + 1)
dout_temp_max_sec <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID + 1)
dout_temp_max_sec <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID + 1)
dout_temp_max_sec <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID + 1)
dout_temp_max_sec <= din_5_r[39:32];
if(cmp_sum[0] == OUT_ID) //求解中间值
dout_temp_mid <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID)
dout_temp_mid <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID)
dout_temp_mid <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID)
dout_temp_mid <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID)
dout_temp_mid <= din_5_r[39:32];
if(cmp_sum[0] == OUT_ID - 1) //求解次最小值
dout_temp_min_sec <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID - 1)
dout_temp_min_sec <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID - 1)
dout_temp_min_sec <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID - 1)
dout_temp_min_sec <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID - 1)
dout_temp_min_sec <= din_5_r[39:32];
if(cmp_sum[0] == OUT_ID - 2) //求解最小值
dout_temp_min <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID - 2)
dout_temp_min <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID - 2)
dout_temp_min <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID - 2)
dout_temp_min <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID - 2)
dout_temp_min <= din_5_r[39:32];
end
end
end
end
endgenerate
assign dout_max = dout_temp_max;
assign dout_max_sec = dout_temp_max_sec;
assign dout_mid = dout_temp_mid;
assign dout_min_sec = dout_temp_min_sec;
assign dout_min = dout_temp_min;
assign dout_valid = din_valid_r[5];
endmodule
对窗口内每行数据都调用此模块,我们可以得到每行数据的中值,但是仅仅是这5个中值不能够代表整个窗口数据的中值,我们还需要找到每行数据的次最大值中的最小值,以及次最小值中的最大值与这5个中值一起组成7个数据,找出这7个数据的中值即为整个窗口的中值。我们仅需要对上述模块进行改写,便能实现对7个数据进行比较,代码改写如下:
`timescale 1ns / 1ps
//该模块实现了消耗6个CLK,得到7个数据的中位数
module VIP_Sort_2d(
input clk,
input rst_n,
input din_valid,
input [7:0] din_1,din_2,din_3,din_4,din_5,din_6,din_7,
output dout_valid,
output [7:0] dout_mid
);
parameter KSZ = 7 ; //需要排列的数据个数
parameter MAX_NUM = 8;
parameter OUT_ID = (KSZ >> 1); // 待输出的排序ID
reg [MAX_NUM-1:0] cmp_sum[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r2[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r3[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r4[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r5[0:KSZ-1];
reg [MAX_NUM-1:0] cmp_sum_r6[0:KSZ-1];
reg [7:0] din[0:KSZ-1]; //定义一个存储空间,将输入数据缓存起来
reg [7:0] dout_temp_mid;
always @ (posedge clk or negedge rst_n) begin //消耗了1个CLK
if(!rst_n) begin
din[0] <= 8'd0;
din[1] <= 8'd0;
din[2] <= 8'd0;
din[3] <= 8'd0;
din[4] <= 8'd0;
din[5] <= 8'd0;
din[6] <= 8'd0;
end
else if(din_valid == 1'b1)begin
din[0] <= din_1;
din[1] <= din_2;
din[2] <= din_3;
din[3] <= din_4;
din[4] <= din_5;
din[5] <= din_6;
din[6] <= din_7;
end
end
reg [39:0] din_1_r,din_2_r,din_3_r,din_4_r,din_5_r,din_6_r,din_7_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
din_1_r <= 40'd0;
din_2_r <= 40'd0;
din_3_r <= 40'd0;
din_4_r <= 40'd0;
din_5_r <= 40'd0;
din_6_r <= 40'd0;
din_7_r <= 40'd0;
end
else begin
din_1_r <= {din_1_r[31:0],din_1};
din_2_r <= {din_2_r[31:0],din_2};
din_3_r <= {din_3_r[31:0],din_3};
din_4_r <= {din_4_r[31:0],din_4};
din_5_r <= {din_5_r[31:0],din_5};
din_6_r <= {din_6_r[31:0],din_6};
din_7_r <= {din_7_r[31:0],din_7};
end
end
//关键比较代码如下:
//将第一个数据与其他数据做比较,结果存放在cmp_result[0]中
reg cmp_result[0:KSZ-1][0:KSZ-1]; //比较中间结果信号
generate
begin:xhdl1
genvar i;
for(i = 1;i <= KSZ - 1;i = i + 1)
begin:CMP_1st
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_result[0][i] <= 1'b0;
cmp_result[0][0] <= 1'b0;
end
else
cmp_result[0][i] <= (din[0] >= din[i])?1'b1:1'b0;
end
end
end
endgenerate
//其他数据的比较电路
generate
begin:xhdl4
genvar i;
for(i = 2;i <= KSZ;i = i + 1) //除了第一个数据的总共KSZ-1个数据
begin:CMP_Others
begin: xhdl2
genvar j;
for(j = 1;j <= i - 1;j = j + 1)
begin:CMP_Previous //与本数据之前的数据作比较
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_result[i-1][j-1] <= 1'b0;
cmp_result[i-1][i-1] <= 1'b0;
end
else
cmp_result[i-1][j-1] <= (din[i - 1] > din[j - 1])?1'b1:1'b0;
end
end
end
begin:xhdl3
genvar j;
for(j = i + 1;j <= KSZ;j = j + 1)
begin:CMP_After //与本数据之后的数据作比较
always @ (posedge clk or negedge rst_n) begin
if(!rst_n)
cmp_result[i-1][j-1] <= 1'b0;
else
cmp_result[i-1][j-1] <= (din[i - 1] >= din[j - 1])?1'b1:1'b0;
end
end
end
end
end
endgenerate
//将数据有效信号延迟6个CLK
reg [5:0] din_valid_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n)
din_valid_r <= 6'd0;
else
din_valid_r <= {din_valid_r[4:0],din_valid};
end
//将比较结果进行相加
generate
begin:xhdl5
genvar i;
for(i = 0;i <= KSZ - 1;i = i + 1)
begin:CMP_r_sum
if(KSZ == 7)
begin:sum_ksz_5
always @(posedge clk or negedge rst_n) begin
if(!rst_n) begin
cmp_sum[i] <= {MAX_NUM{1'b0}};
cmp_sum_r[i] <= {MAX_NUM{1'b0}};
cmp_sum_r2[i] <= {MAX_NUM{1'b0}};
cmp_sum_r3[i] <= {MAX_NUM{1'b0}};
cmp_sum_r4[i] <= {MAX_NUM{1'b0}};
cmp_sum_r5[i] <= {MAX_NUM{1'b0}};
cmp_sum_r6[i] <= {MAX_NUM{1'b0}};
end
else begin
if(din_valid_r[1] == 1'b1) begin
cmp_sum_r[i] <= (cmp_result[i][0])+(cmp_result[i][6]);
cmp_sum_r2[i] <= (cmp_result[i][1])+(cmp_result[i][5]);
cmp_sum_r3[i] <= (cmp_result[i][2])+(cmp_result[i][4]);
cmp_sum_r4[i] <= (cmp_result[i][3]);
end
if(din_valid_r[2] == 1'b1) begin
cmp_sum_r5[i] <= cmp_sum_r[i] + cmp_sum_r2[i];
cmp_sum_r6[i] <= cmp_sum_r3[i]+ cmp_sum_r4[i];
end
if(din_valid_r[3] == 1'b1)
cmp_sum[i] <= cmp_sum_r5[i] + cmp_sum_r6[i];
end
end
end
end
end
if(KSZ == 7)
begin:dout_ksz_5
always @ (posedge clk or negedge rst_n) begin
if(!rst_n)
dout_temp_mid <= 8'd0;
else begin
if(din_valid_r[4] == 1'b1) begin
if(cmp_sum[0] == OUT_ID)
dout_temp_mid <= din_1_r[39:32];
if(cmp_sum[1] == OUT_ID)
dout_temp_mid <= din_2_r[39:32];
if(cmp_sum[2] == OUT_ID)
dout_temp_mid <= din_3_r[39:32];
if(cmp_sum[3] == OUT_ID)
dout_temp_mid <= din_4_r[39:32];
if(cmp_sum[4] == OUT_ID)
dout_temp_mid <= din_5_r[39:32];
if(cmp_sum[5] == OUT_ID)
dout_temp_mid <= din_6_r[39:32];
if(cmp_sum[6] == OUT_ID)
dout_temp_mid <= din_7_r[39:32];
end
end
end
end
endgenerate
assign dout_mid = dout_temp_mid;
assign dout_valid = din_valid_r[5];
endmodule
对以上代码进行拼接,我们便能实现基于FPGA图像处理的中值滤波算法,中值滤波模块代码如下:
`timescale 1ns / 1ps
//5X5的中值滤波
module VIP_8Bit_Medfilt(
input clk ,
input rst_n ,
input per_frame_vsync, //Prepared Image data vsync valid signal
input per_frame_href, //Prepared Image data href vaild signal
input per_frame_clken, //Prepared Image data output/capture enable clock
input [7:0] per_img_8Bit, //Prepared Image Bit flag outout(1: Value, 0:inValid)
//Image data has been processd
output post_frame_vsync, //Processed Image data vsync valid signal
output post_frame_href, //Processed Image data href vaild signal
output post_frame_clken, //Processed Image data output/capture enable clock
output [7:0] post_img_8Bit //Processed Image Bit flag outout(1: Value, 0:inValid)
);
//调用5X5的卷积核,消耗2个CLK
wire matrix_frame_vsync; //Prepared Image data vsync valid signal
wire matrix_frame_href; //Prepared Image data href vaild signal
wire matrix_frame_clken; //Prepared Image data output/capture enable clock
wire [7:0] matrix_p11, matrix_p12, matrix_p13, matrix_p14, matrix_p15; //5X5 Matrix output
wire [7:0] matrix_p21, matrix_p22, matrix_p23, matrix_p24, matrix_p25;
wire [7:0] matrix_p31, matrix_p32, matrix_p33, matrix_p34, matrix_p35;
wire [7:0] matrix_p41, matrix_p42, matrix_p43, matrix_p44, matrix_p45;
wire [7:0] matrix_p51, matrix_p52, matrix_p53, matrix_p54, matrix_p55;
VIP_Matrix_Generate_5X5_8bit VIP_Matrix_Generate_5X5_8bit_inst(
//global clock
.clk (clk), //cmos video pixel clock
.rst_n(rst_n), //global reset
//Image data prepred to be processd
.per_frame_vsync(per_frame_vsync), //Prepared Image data vsync valid signal
.per_frame_href (per_frame_href), //Prepared Image data href vaild signal
.per_frame_clken(per_frame_clken), //Prepared Image data output/capture enable clock
.per_img_Y(per_img_8Bit), //Prepared Image brightness input
//Image data has been processd
.matrix_frame_vsync(matrix_frame_vsync), //Prepared Image data vsync valid signal
.matrix_frame_href (matrix_frame_href), //Prepared Image data href vaild signal
.matrix_frame_clken(matrix_frame_clken), //Prepared Image data output/capture enable clock
.matrix_p11(matrix_p11),.matrix_p12(matrix_p12),.matrix_p13(matrix_p13),.matrix_p14(matrix_p14),.matrix_p15(matrix_p15), //3X3 Matrix output
.matrix_p21(matrix_p21),.matrix_p22(matrix_p22),.matrix_p23(matrix_p23),.matrix_p24(matrix_p24),.matrix_p25(matrix_p25),
.matrix_p31(matrix_p31),.matrix_p32(matrix_p32),.matrix_p33(matrix_p33),.matrix_p34(matrix_p34),.matrix_p35(matrix_p35),
.matrix_p41(matrix_p41),.matrix_p42(matrix_p42),.matrix_p43(matrix_p43),.matrix_p44(matrix_p44),.matrix_p45(matrix_p45),
.matrix_p51(matrix_p51),.matrix_p52(matrix_p52),.matrix_p53(matrix_p53),.matrix_p54(matrix_p54),.matrix_p55(matrix_p55)
);
wire dout_valid;
wire [7:0] row1_max,row1_mid,row1_min;
wire [7:0] row2_max,row2_mid,row2_min;
wire [7:0] row3_max,row3_mid,row3_min;
wire [7:0] row4_max,row4_mid,row4_min;
wire [7:0] row5_max,row5_mid,row5_min;
//获取5X5模板中每一行的次最大值、中间值、和次最小值,消耗6个CLK
VIP_Sort_1d VIP_Sort_1d_row1(
.clk (clk),
.rst_n(rst_n),
.din_valid(matrix_frame_clken),
.din_1(matrix_p11),.din_2(matrix_p12),.din_3(matrix_p13),.din_4(matrix_p14),.din_5(matrix_p15),
.dout_valid(dout_valid),
.dout_max(),.dout_max_sec(row1_max),.dout_mid(row1_mid),.dout_min_sec(row1_min),.dout_min()
);
VIP_Sort_1d VIP_Sort_1d_row2(
.clk (clk),
.rst_n(rst_n),
.din_valid(matrix_frame_clken),
.din_1(matrix_p21),.din_2(matrix_p22),.din_3(matrix_p23),.din_4(matrix_p24),.din_5(matrix_p25),
.dout_valid(),
.dout_max(),.dout_max_sec(row2_max),.dout_mid(row2_mid),.dout_min_sec(row2_min),.dout_min()
);
VIP_Sort_1d VIP_Sort_1d_row3(
.clk (clk),
.rst_n(rst_n),
.din_valid(matrix_frame_clken),
.din_1(matrix_p31),.din_2(matrix_p32),.din_3(matrix_p33),.din_4(matrix_p34),.din_5(matrix_p35),
.dout_valid(),
.dout_max(),.dout_max_sec(row3_max),.dout_mid(row3_mid),.dout_min_sec(row3_min),.dout_min()
);
VIP_Sort_1d VIP_Sort_1d_row4(
.clk (clk),
.rst_n(rst_n),
.din_valid(matrix_frame_clken),
.din_1(matrix_p41),.din_2(matrix_p42),.din_3(matrix_p43),.din_4(matrix_p44),.din_5(matrix_p45),
.dout_valid(),
.dout_max(),.dout_max_sec(row4_max),.dout_mid(row4_mid),.dout_min_sec(row4_min),.dout_min()
);
VIP_Sort_1d VIP_Sort_1d_row5(
.clk (clk),
.rst_n(rst_n),
.din_valid(matrix_frame_clken),
.din_1(matrix_p51),.din_2(matrix_p52),.din_3(matrix_p53),.din_4(matrix_p54),.din_5(matrix_p55),
.dout_valid(),
.dout_max(),.dout_max_sec(row5_max),.dout_mid(row5_mid),.dout_min_sec(row5_min),.dout_min()
);
//寻找每一行次最大值中的最小值,次最小值中的最大值,消耗6个CLK
wire dout_valid_col;
wire [7:0] dout_max_min;
VIP_Sort_1d VIP_Sort_1d_col1(
.clk (clk),
.rst_n(rst_n),
.din_valid(dout_valid),
.din_1(row1_max),.din_2(row2_max),.din_3(row3_max),.din_4(row4_max),.din_5(row5_max),
.dout_valid(dout_valid_col),
.dout_max(),.dout_max_sec(),.dout_mid(),.dout_min_sec(),.dout_min(dout_max_min)
);
wire [7:0] dout_min_max;
VIP_Sort_1d VIP_Sort_1d_col2(
.clk (clk),
.rst_n(rst_n),
.din_valid(dout_valid),
.din_1(row1_min),.din_2(row2_min),.din_3(row3_min),.din_4(row4_min),.din_5(row5_min),
.dout_valid(),
.dout_max(dout_min_max),.dout_max_sec(),.dout_mid(),.dout_min_sec(),.dout_min()
);
//此时row1_mid、row2_mid、row3_mid、row4_mid、row5_mid落后于dout_max_min、dout_min_max6个CLK
//为了保持同步,将中间值也延迟6个CLK
reg [47:0] row1_mid_r,row2_mid_r,row3_mid_r,row4_mid_r,row5_mid_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
row1_mid_r <= 48'd0;
row2_mid_r <= 48'd0;
row3_mid_r <= 48'd0;
row4_mid_r <= 48'd0;
row5_mid_r <= 48'd0;
end
else begin
row1_mid_r <= {row1_mid_r[39:0],row1_mid};
row2_mid_r <= {row2_mid_r[39:0],row2_mid};
row3_mid_r <= {row3_mid_r[39:0],row3_mid};
row4_mid_r <= {row4_mid_r[39:0],row4_mid};
row5_mid_r <= {row5_mid_r[39:0],row5_mid};
end
end
wire [7:0] row1_mid_r2,row2_mid_r2,row3_mid_r2,row4_mid_r2,row5_mid_r2;
assign row1_mid_r2 = row1_mid_r[47:40];
assign row2_mid_r2 = row2_mid_r[47:40];
assign row3_mid_r2 = row3_mid_r[47:40];
assign row4_mid_r2 = row4_mid_r[47:40];
assign row5_mid_r2 = row5_mid_r[47:40];
//计算采集到的7个数据的中位数,消耗6个CLK
wire [7:0] dout;
VIP_Sort_2d VIP_Sort_2d_inst(
.clk (clk),
.rst_n(rst_n),
.din_valid(dout_valid_col),
.din_1(dout_max_min),.din_2(dout_min_max),.din_3(row1_mid_r2),.din_4(row2_mid_r2),.din_5(row3_mid_r2),.din_6(row4_mid_r2),.din_7(row5_mid_r2),
.dout_valid(dout_valid_1),
.dout_mid(dout)
);
//为了保持同步,将输入控制信号也延时2+6+6+6 = 20个CLK
reg [19:0] post_frame_vsync_r;
reg [19:0] post_frame_href_r;
reg [19:0] post_frame_clken_r;
always @ (posedge clk or negedge rst_n) begin
if(!rst_n) begin
post_frame_vsync_r <= 20'd0;
post_frame_href_r <= 20'd0;
post_frame_clken_r <= 20'd0;
end
else begin
post_frame_vsync_r <= {post_frame_vsync_r[18:0],per_frame_vsync};
post_frame_href_r <= {post_frame_href_r[18:0] ,per_frame_href };
post_frame_clken_r <= {post_frame_clken_r[18:0],per_frame_clken};
end
end
assign post_frame_vsync = post_frame_vsync_r[19];
assign post_frame_href = post_frame_href_r [19];
assign post_frame_clken = post_frame_clken_r[19];
assign post_img_8Bit = dout;
endmodule
现在,我们来仿真观察效果如何
原图(加入噪声密度:0.3的椒盐噪声):
处理之后的图:
可以看到效果还是非常明显的,但是仔细观察经过中值滤波后的图像就会发现,在整幅图像的左边缘和下边缘会存在“黑边”,这是由于代码设计的缺陷所导致的,为了兼顾代码的简单可操作,同时保证效果,舍弃了边缘数据,感兴趣的同学可以继续研究,如何让算法更完善,效果更好。
参考文献:牟新刚,周晓,郑晓亮.基于FPGA的数字图像处理原理及应用;
阮秋琦.数字图像处理(MATLAB版)(第二版)》(本科教学版);
CrazyBingo.基于VIP_Board Big的FPGA入门进阶及图像处理算法开发教程;
韩玉鑫,王晓凯,陆金旺.实时彩色图像自适应中值滤波算法的FPGA实现[J].计算测量与控制.2022.30(7):173-180;
穆向阳,雷妍.一种中值滤波快速系统的FPGA实现[J].西安石油大学学报.2022.6(24):1-8;