sobel图像边缘检测算法的Python及Verilog验证

引言

      本文是之前AI加速器项目的一个补充,即之前AI加速器中PL部分的工作。本文主要介绍的是一种sobel图像边缘检测算法的Python及Verilog实现。

一、sobel图像边缘检测算法的Python实现

      sobel图片边缘检测算法的核心就是图片的卷积操作,我们所用的是如下图所示的一个3*3的卷积核。

-1 0 1
-2 0 2
-1 0 1

      图片的卷积操作简单来说就是移位相乘然后累加运算,具体资料各位可以上网查找,关于原理方面的介绍非常多,本文不再赘述。在本文中,利用Python对lenna图做了一个简单的边缘提取操作,一是为verilog提供测试数据,二是获取结果与verilog仿真结果形成比较,以判断verilog仿真结果的效果如何。  

  • Python实现代码
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 15 14:44:19 2020
@author: lenovo
"""
import matplotlib.pyplot as plt # plt 用于显示图片
import matplotlib.image as mpimg # mpimg 用于读取图片
#import skimage
import tensorflow as tf
#import numpy as np
# 有符号数转十六进制补码,返回值为字符串
def int_to_hex(num):
    if(num < 0):
        result = hex(num & 0xff)
    else:
        result = hex(num)
    return result

image = mpimg.imread('lenna.jpg') # 读取lenna图
#image = skimage.transform.resize(image, (64,64)) # 减小像素
#image = (image * 255).astype(np.uint8)# 限定数据范围为 0 - 255
'''
#写入图片数据到txt文件中供verilog仿真使用 512*512=262144
for i in range(512):
    for j in range(512):
        fp = open('lenna.txt',mode='a')# 追加模式
        fp.write(int_to_hex(image[i][j])[2:4]+"\n")
fp.close()# 关闭文件
'''
# 显示原始图片
plt.figure(1)
plt.imshow(image) 
plt.title('before filter')
plt.axis('off') # 不显示坐标轴

# 转换输入图片维度
image = tf.convert_to_tensor(image,dtype = tf.float32)
image = tf.reshape(image,[1,512,512,1])

# 创建卷积核
filter = tf.Variable(tf.constant([[-1.0,0,1.0], [-2.0,0,2.0], [-1.0,0,1.0]],shape = [3,3,1,1],dtype = tf.float32))
# 执行卷积
conv = tf.nn.conv2d(image,filter, strides=[1, 1, 1, 1], padding='VALID')
# 转换图片维度
conv = tf.reshape(conv, shape=[510,510])

# 显示滤波后图片
plt.figure(2)
plt.imshow(conv.numpy()) 
plt.title('after filter')
plt.axis('off') # 不显示坐标轴
  • 结果展示

sobel图像边缘检测算法的Python及Verilog验证_第1张图片

sobel图像边缘检测算法的Python及Verilog验证_第2张图片

二、sobel图像边缘检测算法的Verilog实现

      本节主要介绍sobel图像边缘检测算法的Verilog实现,仿真平台采用Modelsim,由于FPGA具有并行处理能力,运行速度很快,所以在大规模图像和信号处理方面具有非常广阔的应用,受到了业界的青睐。

  • 顶层文件
module conv (
	//system signals
	input		rst_n					,//复位信号
	input		clk						,//时钟信号
	input		start					,//卷积开始信号
	input		wire  signed [7:0]conv_in,//卷积输入数据
	output		reg  finish				,//卷积完成信号
	output		reg  signed [19:0]conv_out//卷积结果数据输出
);
// 临时数据存储器
wire signed  [7:0] conv_temp[0:8];
wire signed [19:0] conv_mul[0:8];
wire signed [19:0] conv_result;

reg signed [3:0]conv_core[0:8];// 寄存3*3卷积核数据
// 初始化卷积核[-1,0,1;-2,0,2;-1,0,1]
always @(negedge rst_n)
begin
  conv_core[0] <= 4'hf;//-1
  conv_core[1] <= 4'h0;//0
  conv_core[2] <= 4'h1;//1
  conv_core[3] <= 4'he;//-2
  conv_core[4] <= 4'h0;//0
  conv_core[5] <= 4'h2;//2
  conv_core[6] <= 4'hf;//-1
  conv_core[7] <= 4'h0;//0
  conv_core[8] <= 4'h1;//1
end

// 加载输入数据,输入图片维度为[64,64]
reg 	[9:0]ini_count		;// 输入数据行计数
reg 	[9:0]inj_count		;// 输入数据列计数
reg     conv_cal_start      ;// 卷积计算开始信号(数据加载完成信号)
reg  signed  [7:0]conv_iArrayData[0:511][0:511];//寄存输入数据
always @ (posedge clk or negedge rst_n or posedge finish) begin
	if(!rst_n || finish)begin
		ini_count <= 10'd0;
		inj_count <= 10'd0;
		conv_cal_start <= 1'b0;
	end
	else if(start && (ini_count < 10'd512))// 启动行优先寄存数据,即一行一行地寄存输入数据
	begin
		if (inj_count < 10'd511) 
			inj_count <= inj_count + 1'b1;
		else begin if(ini_count < 10'd511)
			begin
				ini_count <= ini_count + 1'b1;
				inj_count <= 10'd0;
			end
			else begin
				conv_cal_start <= 1'b1;	
			end
		end
		conv_iArrayData[ini_count][inj_count] <= conv_in;// 加载输入数据
	end
end

// 执行卷积
reg 	[9:0]convi_count		;// 卷积行计数
reg 	[9:0]convj_count		;// 卷积列计数
reg  signed  [19:0]conv_oArrayData[0:509][0:509];// 寄存卷积结果输出
always @ (posedge clk or negedge rst_n) begin
	if(!rst_n)begin
		convi_count <= 10'd0;
		convj_count <= 10'd0;
		finish <= 1'b0;
	end
	else if(conv_cal_start && (convi_count < 10'd510))// 启动行优先遍历数据,即一行一行地遍历输入数据
	begin
		if (convj_count < 10'd509) 
			convj_count <= convj_count + 1'b1;
		else begin if(convi_count < 10'd509)
			begin
				convi_count <= convi_count + 1'b1;
				convj_count <= 10'd0;
			end
			else begin
				finish <= 1'b1;	
			end
		end
		conv_oArrayData[convi_count][convj_count] <= conv_result;// 输出
	end	
        
end

// 卷积计算
// 取出3*3卷积核对应位置的图片数据
assign conv_temp[0] = conv_iArrayData[convi_count+0][convj_count+0];
assign conv_temp[1] = conv_iArrayData[convi_count+0][convj_count+1];
assign conv_temp[2] = conv_iArrayData[convi_count+0][convj_count+2];
assign conv_temp[3] = conv_iArrayData[convi_count+1][convj_count+0];
assign conv_temp[4] = conv_iArrayData[convi_count+1][convj_count+1];
assign conv_temp[5] = conv_iArrayData[convi_count+1][convj_count+2];
assign conv_temp[6] = conv_iArrayData[convi_count+2][convj_count+0];
assign conv_temp[7] = conv_iArrayData[convi_count+2][convj_count+1];
assign conv_temp[8] = conv_iArrayData[convi_count+2][convj_count+2];
// 卷积核对应位置数据和卷积核相乘
assign conv_mul[0] = conv_core[0]*conv_temp[0];
assign conv_mul[1] = conv_core[1]*conv_temp[1];
assign conv_mul[2] = conv_core[2]*conv_temp[2];
assign conv_mul[3] = conv_core[3]*conv_temp[3];
assign conv_mul[4] = conv_core[4]*conv_temp[4];
assign conv_mul[5] = conv_core[5]*conv_temp[5];
assign conv_mul[6] = conv_core[6]*conv_temp[6];
assign conv_mul[7] = conv_core[7]*conv_temp[7];
assign conv_mul[8] = conv_core[8]*conv_temp[8];
// 求和累加
assign conv_result = conv_mul[0] + conv_mul[1] + conv_mul[2] + 
                     conv_mul[3] + conv_mul[4] + conv_mul[5] + 
                     conv_mul[6] + conv_mul[7] + conv_mul[8];

// 输出数据
reg 	[9:0]outi_count		;// 卷积行计数
reg 	[9:0]outj_count		;// 卷积列计数
always @ (posedge clk or negedge rst_n or posedge start) begin
	if(!rst_n || start)begin
		outi_count <= 10'd0;
		outj_count <= 10'd0;
	end
	else if(finish && (outi_count < 10'd510))begin // 启动行优先输出数据,即一行一行地输出卷积结果
    	if(outj_count < 10'd509)  
    		outj_count <= outj_count + 1'b1;
    	else begin if(outi_count < 10'd509)
    		begin 
      			outi_count <= outi_count + 1'b1; 
      			outj_count <= 10'd0; 
      		end
    	end
    conv_out <= conv_oArrayData[outi_count][outj_count];// 输出数据
  end
end
endmodule
  •  测试文件
`timescale 1ns/1ps
module conv_tb ();
// input
reg clk;
reg rst_n;
reg start;//卷积开始信号
reg signed[7:0]conv_in;//输入数据

// output
wire finish;
wire  signed [19:0]conv_out;//卷积结果数据输出

reg [9:0] i;
reg [9:0] j;
reg  signed  [7:0]myimage[0:511][0:511];//寄存输入图片数据
reg  signed [19:0]image_result[0:509][0:509];  //寄存输出图片数据
integer out_file;//定义文件句柄

// 例化模块
conv demo(.rst_n(rst_n),
		  .clk(clk),
		  .start(start),
		  .conv_in(conv_in),
		  .finish(finish),
		  .conv_out(conv_out));

//初始化块
initial
begin
	$display("step1:Load  Data");
  	$readmemh("C:/Users/lenovo/Desktop/conv_verilog/lenna.txt",myimage);
  	for(i = 0; i <= 10'd511; i = i + 1)
  		for (j = 0; j <= 10'd511 ; j = j + 1)
    		$display("%d",myimage[i][j]);
    // 时钟、复位初始化		
	clk = 1'b0;
	start = 1'b0;
	rst_n = 1'b1;
	#5 rst_n = 1'b0;
	#5 rst_n = 1'b1;
	   start = 1'b1;
	// 写数据到conv模块的conv_iArrayData数组中
	$display("step2:Write Data to conv_iArrayData");
	for(i = 0; i <= 10'd511; i = i + 1)
  		for(j = 0; j <= 10'd511; j = j + 1)
  		begin
      		conv_in= myimage[i][j];
      		#10;
  		end
  	start = 1'b0;//完成写数据
  	//执行卷积
  	$display("step3:Convolution");
  	while(!finish) #10;
  	#10;
  	//写结果到文件
  	out_file = $fopen("C:/Users/lenovo/Desktop/conv_verilog/result.txt","w");//获取文件句柄
  	$display("step4:write result");
  	for(i = 0; i <= 10'd509; i = i + 1)
  		for(j = 0; j <= 10'd509; j = j + 1)  
  		begin
      		image_result[i][j] = conv_out;
      		#10;
  		end  
  	for(i = 0; i <= 10'd509; i = i + 1)
  		for(j = 0; j <= 10'd509; j = j + 1)begin
  			$display("%d",image_result[i][j]); 
  			$fwrite(out_file,"%d\n",$signed(image_result[i][j]));
  		end
    		
	$display("done");
end
always #5 clk = ~clk;
endmodule
  • 仿真波形

sobel图像边缘检测算法的Python及Verilog验证_第3张图片

verilog仿真通过后将结果输出到.txt文件中,我们在Python中进行读取验证,与Python结果进行比较。

# -*- coding: utf-8 -*-
"""
Created on Sun Aug 16 15:46:04 2020
@author: lenovo
"""
import matplotlib.pyplot as plt # plt 用于显示图片
import numpy as np

result = np.genfromtxt("result.txt", delimiter=" ")# 读入fpga处理后的结果
result = np.reshape(result,[510,510])

# 显示滤波后图片
plt.imshow(result) 
plt.title('after fpga')
plt.axis('off') # 不显示坐标轴
  •    Verilog 仿真结果

sobel图像边缘检测算法的Python及Verilog验证_第4张图片

     Verilog仿真结果图片与Python结果图片有些许不同,但大体一致,这种不同可能是由于Verilog在运算过程中存在量化误差而导致的,但总的来说可以接受。

 

 

你可能感兴趣的:(图像处理)