1>以fft256为运算核心,linux驱动将待计算的fft数据写入RAM,
2>告知mkg_core进行reset 和start,
3>mkg_core控制master,通过DMA形式,读取ram中的数据,
4>送给fft256,
5>然后获得fft256的运算结果,
6>写回ram,
7>最后驱动读取计算结果并打印。
注意:
这次的RAM是自己实现的一块独立的专用的RAM,不是使用的外部的DDR SDRAM。
2>mkg_wb_slave模块
3>mkg_core模块
4>FFT256模块
5>mkg_wb_master模块
/* * * rill create 2013-04-18 * [email protected] * */ //`include "mkg_defines.v" module mkg_wb_ram ( wb_clk, wb_rst, wb_dat_i, wb_adr_i, wb_sel_i, wb_cti_i, wb_bte_i, wb_we_i, wb_cyc_i, wb_stb_i, wb_dat_o, wb_ack_o, wb_err_o, wb_rty_o ); input wb_clk; input wb_rst; input [31:0] wb_adr_i; input wb_stb_i; input wb_cyc_i; input [2:0] wb_cti_i; input [1:0] wb_bte_i; input [31:0] wb_dat_i; input [3:0] wb_sel_i; input wb_we_i; output reg [31:0] wb_dat_o; output reg wb_ack_o; output reg wb_err_o; output reg wb_rty_o; parameter nb=10; wire [15:0] D_I,D_R; wire [nb-1:0] DR,DI; wire [7:0] ADDR; assign DR=(D_R[15]&&(nb!=16))? (D_R[15:15-nb+1]+1) : D_R[15:15-nb+1]; assign DI=(D_I[15]&&(nb!=16))? (D_I[15:15-nb+1]+1) : D_I[15:15-nb+1]; Wave_ROM256 my_rom (.ADDR(ADDR), .DATA_RE(D_R), .DATA_IM(D_I) ); assign ADDR=wb_adr_i[9:2]; parameter my_ram_adr=8'h98; parameter data_adr_start=32'h9800_0000; parameter data_adr_end=32'h9800_03fc; parameter rslt_adr_start=32'h9800_0100; parameter rslt_adr_end=32'h9801_03fc; parameter error_code=32'habcd_dcba; parameter Numb=256; parameter Idle=5'b00001; parameter Read_Data=5'b00010; parameter Read_Rslt=5'b00100; parameter Write_Data=5'b01000; parameter Write_Rslt=5'b10000; reg [31:0] Data [Numb-1:0]; reg [31:0] Result [Numb-1:0]; reg [4:0] state,next_state; always @(posedge wb_clk) begin if(wb_rst) begin state<=Idle; end else begin state<=next_state; end end always @(*) begin case(state) Idle: begin if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end) begin next_state=Read_Data; end else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end) begin next_state=Read_Rslt; end else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end) begin next_state=Write_Data; end else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end) begin next_state=Write_Rslt; end else begin next_state=Idle; end end Write_Data:begin next_state=Idle; end Write_Rslt:begin next_state=Idle; end Read_Data: begin next_state=Idle; end Read_Rslt: begin next_state=Idle; end default: begin next_state=Idle; end endcase end always @(posedge wb_clk) begin if(wb_rst) begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; end else begin case(next_state) Idle: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; end Write_Data: begin Data[wb_adr_i[9:2]]<=wb_dat_i; wb_ack_o<=1'b1; end Write_Rslt: begin Result[wb_adr_i[9:2]]<=wb_dat_i; wb_ack_o<=1'b1; end Read_Data: begin // wb_dat_o<=Data[wb_adr_i[9:2]]; wb_dat_o<= {6'b0,DR,6'b0,DI}; wb_ack_o<=1'b1; end Read_Rslt: begin wb_dat_o<=Result[wb_adr_i[9:2]]; wb_ack_o<=1'b1; end default: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; end endcase end end endmodule /************** EOF ****************/
ram的reset timing:
ram的work timing:
/* * * mkg_wb_slave.v * * rill create 2013-04-18 * */ //`include "mkg_defines.v" module mkg_wb_slave ( wb_clk, wb_rst, wb_dat_i, wb_adr_i, wb_sel_i, wb_cti_i, wb_bte_i, wb_we_i, wb_cyc_i, wb_stb_i, wb_dat_o, wb_ack_o, wb_err_o, wb_rty_o, //internal signals status_i, reset_o, enable_o, config_o ); input wb_clk; input wb_rst; input [31:0] wb_adr_i; input wb_stb_i; input wb_cyc_i; input [2:0] wb_cti_i; input [1:0] wb_bte_i; input [31:0] wb_dat_i; input [3:0] wb_sel_i; input wb_we_i; output reg [31:0] wb_dat_o; output reg wb_ack_o; output wb_err_o; output wb_rty_o; //internal signals input [31:0] status_i; output reg reset_o; output reg enable_o; output reg [31:0] config_o; //==local defines parameter s_idle = 3'b000; parameter s_read = 3'b001; parameter s_write = 3'b010; parameter s_config = 3'b100; reg [2:0] state = s_idle; //reset status parameter s_config1 = 3'b000; parameter s_config2 = 3'b001; parameter s_config3 = 3'b010; parameter s_config4 = 3'b100; reg [2:0] config_status = s_config1; reg [31:0] reg_config;//index:0x0,write reg [31:0] reg_status;//index:0x4,read //==loacl logic assign wb_err_o=0; assign wb_rty_o=0; always @(posedge wb_clk)//get core status begin if(wb_rst) begin reg_status <= 32'h0; end else begin reg_status <= status_i; end end always @(posedge wb_clk)//wishbine interface & core config logic begin if(wb_rst) begin state <= s_idle; reset_o <= 1'b0; enable_o <= 1'b0; config_o <= 1'b0; reg_config <= 32'h0; end else begin case(state) s_idle: begin task_idle(); end s_read: begin task_read(); end s_write: begin task_write(); end s_config: begin task_config(); end default: begin state <= s_idle; end endcase end end task automatic task_idle; begin wb_dat_o <= 1'b0;//output initial wb_ack_o <= 1'b0; reset_o <= 1'b0; enable_o <= 1'b0; config_o <= 1'b0; if(wb_stb_i && wb_cyc_i && wb_we_i) begin state <= s_write; end else if(wb_stb_i && wb_cyc_i && !wb_we_i) begin state <= s_read; end else begin state <= s_idle; end end endtask task automatic task_read; begin wb_dat_o <= reg_status; wb_ack_o <= 1'b1; state <= s_idle; end endtask task automatic task_write; begin case (wb_adr_i[8:0]) 9'h0: begin reg_config <= wb_dat_i;//32'h1; wb_ack_o <= 1'b1; state <= s_config; end default: begin wb_ack_o <= 1'b1; state <= s_idle; end endcase end endtask task automatic task_config; begin if(32'h1 == reg_config)//reset mkg_core begin enable_o <= 1'b1; reset_o <= 1'b1; case (config_status) s_config1: begin config_status <= s_config2; end s_config2: begin config_status <= s_config3; end s_config3: begin config_status <= s_config4; end s_config4: begin config_status <= s_config1; state <= s_idle; end endcase end else begin enable_o <= 1'b1; config_o <= reg_config; state <= s_idle; end end endtask endmodule /************** EOF ****************/
reset仿真:
config仿真:
有点复杂,请阅读rtl code。
/* * * mkg_core.v * * rill create 2013-04-18 * */ //`include "mkg_defines.v" module mkg_core ( clk, rst, s_enable_i, s_config_i, s_status_o, fft_ovf1_i, fft_ovf2_i, fft_ready_i, fft_addr_i, fft_dor_i, fft_doi_i, fft_en_o, fft_start_o, fft_shift_o, fft_dr_o, fft_di_o, m_ack_write_i, m_ack_read_i, m_dat_i, m_write_o, m_read_o, m_addr_o, m_dat_o ); input clk; input rst; input s_enable_i; input [31:0] s_config_i; output reg [31:0] s_status_o; input fft_ovf1_i; input fft_ovf2_i; input fft_ready_i; input [7:0] fft_addr_i; input [13:0] fft_dor_i; input [13:0] fft_doi_i; output reg fft_en_o; output reg fft_start_o; output reg [3:0] fft_shift_o; output reg [9:0] fft_dr_o; output reg [9:0] fft_di_o; input m_ack_write_i; input m_ack_read_i; input [31:0] m_dat_i; output reg m_write_o; output reg m_read_o; output reg [31:0] m_addr_o; output reg [31:0] m_dat_o; //===loacl fsm status defines======== //=core_status define parameter s_idle = 8'b0000_0000; parameter s_read_ram = 8'b0000_0001; parameter s_write_buffer = 8'b0000_0010; parameter s_write_ram = 8'b0000_0100; parameter s_done = 8'b0000_1000; reg [7:0] core_status = s_idle; //=read_ram_status define parameter s_read_ram_start = 8'b0000_0000; parameter s_read_ram_wait_master_ack = 8'b0000_0001; parameter s_read_ram_write_fft = 8'b0000_0010; parameter s_read_ram_change_index = 8'b0000_0100; reg [7:0] read_ram_status = s_read_ram_start; //=write_buffer_status define parameter s_write_buffer_wait_ready = 8'b0000_0000; parameter s_write_buffer_write = 8'b0000_0001; reg [7:0] write_buffer_status = s_write_buffer_wait_ready; //=write_ram_status define parameter s_write_ram_start = 8'b0000_0000; parameter s_write_ram_wait_master_ack = 8'b0000_0001; parameter s_write_ram_change_index = 8'b0000_0010; reg [7:0] write_ram_status = s_write_ram_start; //===loacl reg defines======== reg start_flag; reg [15:0] read_index; reg [15:0] buffer_index; reg [7:0] write_index; reg [31:0] result_buffer[255:0]; reg [31:0] dat_read; //============local logic==== always @(posedge clk)//debug interface,return core status to slave begin s_status_o <= {core_status,read_ram_status,write_buffer_status,write_ram_status}; end always @(posedge clk)//get config command from linux driver via slave interface begin if(rst) begin start_flag <= 1'b0; end else begin if(s_enable_i) begin start_flag <= s_config_i & 32'b1; end else begin start_flag <= 1'b0; end end end always @(posedge clk)//top core fsm begin if(rst & s_enable_i) begin task_reset(); end else begin case (core_status) s_idle: begin task_idle(); end s_read_ram: begin task_read_ram(); end s_write_buffer: begin task_write_buffer();//wait fft ready then write the result data to buffer end s_write_ram: begin task_write_ram(); end s_done: begin task_done(); end default: begin core_status <= s_idle; end endcase end end //===reset task action=== task automatic task_reset;//reset initial begin core_status <= s_idle;//core status init read_ram_status <= s_read_ram_start;//read ram status init write_buffer_status <= s_write_buffer_wait_ready;//write buffer status init write_ram_status <= s_write_ram_start;//write ram status init read_index <= 16'h0;//index init buffer_index <= 16'h0; write_index <= 16'h0; fft_en_o <= 1'b0;//fft init fft_start_o <= 1'b0; fft_dr_o <= 1'b0; fft_di_o <= 1'b0; fft_shift_o <= 4'b0; m_write_o <= 1'b0;//master init m_read_o <= 1'b0; m_addr_o <= 32'b0; m_dat_o <= 32'b0; task_result_buffer_init();//buffer init end endtask //task_reset //=====top tasks================================ task automatic task_idle;//wait start flag begin task_reset();//just for debug if(start_flag) begin core_status <= s_read_ram; end else begin core_status <= s_idle; end end endtask //task_idle task automatic task_read_ram;//read data from ram then write it into fft begin if(fft_ready_i)//monitor ready signal begin core_status <= s_write_buffer; write_buffer_status <= s_write_buffer_write; end else begin if(read_index < 16'd256) begin case (read_ram_status) s_read_ram_start: begin m_addr_o <= {8'h98,6'b0,read_index,2'b0}; m_read_o <= 1'b1;//enable master read fft_en_o <= 1'b0;//debug add read_ram_status <= s_read_ram_wait_master_ack; end s_read_ram_wait_master_ack: begin m_read_o <= 1'b0;//clear master read enable m_addr_o <= 32'b0; if(16'b0 == read_index) begin fft_start_o <= 1'b1;//start fft end if(m_ack_read_i) begin dat_read <= m_dat_i;//sample data from master read_ram_status <= s_read_ram_write_fft;//change status end else begin read_ram_status <= s_read_ram_wait_master_ack;//wait end end s_read_ram_write_fft: begin //fft_en_o <= 1'b1;//debug disable fft_en_o <=1'b1; fft_dr_o <= dat_read[25:16]; fft_di_o <= dat_read[9:0]; read_ram_status <= s_read_ram_change_index; end s_read_ram_change_index: begin if(16'b0 == read_index) begin fft_start_o <= 1'b0;//clear start fft end fft_en_o <=1'b0; //fft_en_o <= 1'b0;//disable fft//debug disable read_index <= read_index + 1'b1; read_ram_status <= s_read_ram_start; end default: begin read_ram_status <= s_read_ram_start; end endcase end else begin read_ram_status <= s_read_ram_start; core_status <= s_write_buffer; fft_en_o <=1'b1; end end end endtask //task_read_ram task automatic task_write_buffer;//read fft output data & write it into buffer begin if(buffer_index < 16'd256) begin case (write_buffer_status) s_write_buffer_wait_ready: begin if(fft_ready_i) begin write_buffer_status <= s_write_buffer_write; end else begin write_buffer_status <= s_write_buffer_wait_ready; end end s_write_buffer_write: begin result_buffer[fft_addr_i] <= {2'b00,fft_dor_i,2'b00,fft_doi_i}; buffer_index <= buffer_index + 1'b1; end default: begin write_buffer_status <= s_write_buffer_wait_ready; end endcase end else begin write_buffer_status <= s_write_buffer_wait_ready; core_status <= s_write_ram; end end endtask //task_write_buffer task automatic task_write_ram;//read data from fft to ram via master interface begin if(write_index < 16'd256) begin case (write_ram_status) s_write_ram_start: begin m_dat_o <= result_buffer[write_index]; m_addr_o <= {8'h98,14'b1,write_index,2'b00}; m_write_o <= 1'b1;//enable master write write_ram_status <= s_write_ram_wait_master_ack; end s_write_ram_wait_master_ack: begin m_dat_o <= 32'b0;//clear master write enable m_write_o <= 1'b0; m_addr_o <= 32'b0; if(m_ack_write_i) begin write_ram_status <= s_write_ram_change_index;//change status end else begin write_ram_status <= s_write_ram_wait_master_ack;//wait end end s_write_ram_change_index: begin write_index <= write_index + 1'b1; write_ram_status <= s_write_ram_start; end default: begin write_ram_status <= s_write_ram_start; end endcase end else begin write_ram_status <= s_write_ram_start; core_status <= s_done; end end endtask //task_write_ram task automatic task_done;//self loop untill reset begin core_status <= s_done; end endtask //task_done //=================================top tasks end================== //==1===sub tasks==reset============================== task automatic task_result_buffer_init; begin result_buffer[0] = 32'h0;result_buffer[1] = 32'h0;result_buffer[2] = 32'h0;result_buffer[3] = 32'h0; result_buffer[4] = 32'h0;result_buffer[5] = 32'h0;result_buffer[6] = 32'h0;result_buffer[7] = 32'h0; end endtask //task_result_buffer_init endmodule /************** EOF ****************/
4>mkg_wb_slave和mkg_core的联合仿真
fft256的reset和start的timing:
fft256的readytiming:
reset和start信号:
ready信号:
write_ram信号:
done信号:
与mkg_wb_slave类似。
2>mkg_wb_master的rtl编码
`timescale 1ns/1ps module mkg_wb_master ( wb_clk, wb_rst, wb_adr_o, wb_dat_o, wb_sel_o, wb_we_o, wb_cyc_o, wb_stb_o, wb_cti_o, wb_bte_o, wb_dat_i, wb_ack_i, wb_err_i, wb_rty_i, //internal signals write_i, read_i, addr_i, dat_i, ack_write_o, ack_read_o, dat_o ); //wishbone interface input wb_clk; input wb_rst; input wb_ack_i; input wb_err_i; input wb_rty_i; input [31:0] wb_dat_i; output reg [31:0] wb_adr_o; output reg [31:0] wb_dat_o; output reg wb_cyc_o; output reg wb_stb_o; output reg [3:0] wb_sel_o; output reg wb_we_o; output reg [2:0] wb_cti_o; output reg [1:0] wb_bte_o; //internal signals input write_i; input read_i; input [31:0] addr_i; input [31:0] dat_i; output reg ack_write_o; output reg ack_read_o; output reg [31:0] dat_o; parameter Idle= 12'b0000_0000_0001; //parameter R_Idle= 12'b0000_0000_0010; parameter R_Ready= 12'b0000_0000_0100; parameter R_Wait= 12'b0000_0000_1000; parameter R_Done= 12'b0000_0001_0000; //parameter W_Idle= 12'b0000_0010_0000; parameter W_Ready= 12'b0000_0100_0000; parameter W_Wait= 12'b0000_1000_0000; parameter W_Done= 12'b0001_0000_0000; reg [11:0] state,next_state; always @(posedge wb_clk) if(wb_rst) begin state<=Idle; end else begin state<=next_state; end always @(*) begin case(state) Idle: begin if(write_i && ! read_i ) begin next_state=W_Ready; end else if(!write_i && read_i ) begin next_state=R_Ready; end else next_state=Idle; end W_Ready: begin next_state=W_Wait; end W_Wait: begin if(wb_ack_i) next_state=W_Done; else next_state=W_Wait; end W_Done: begin next_state=Idle; end R_Ready: begin next_state=R_Wait; end R_Wait: begin if(wb_ack_i) next_state=R_Done; else next_state=R_Wait; end R_Done: begin next_state=Idle; end endcase end always @(posedge wb_clk) if(wb_rst) begin wb_we_o<=0; wb_cyc_o<=0; wb_stb_o<=0; wb_adr_o<=0; wb_dat_o<=0; wb_sel_o<=0; wb_cti_o<=0; wb_bte_o<=0; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end else begin case(next_state) Idle: begin wb_we_o<=0; wb_cyc_o<=0; wb_stb_o<=0; wb_adr_o<=0; wb_dat_o<=0; wb_sel_o<=0; wb_cti_o<=0; wb_bte_o<=0; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end W_Ready: begin wb_we_o<=1'b1; wb_cyc_o<=1'b1; wb_stb_o<=1'b1; wb_adr_o<=addr_i; wb_dat_o<=dat_i; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end W_Wait: begin wb_we_o<=wb_we_o; wb_cyc_o<=wb_cyc_o; wb_stb_o<=wb_stb_o; wb_adr_o<=wb_adr_o; wb_dat_o<=wb_dat_o; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end W_Done: begin wb_we_o<=0; wb_cyc_o<=0; wb_stb_o<=0; wb_adr_o<=0; wb_dat_o<=0; ack_write_o<=1'b1; ack_read_o<=0; dat_o<=0; end R_Ready: begin wb_we_o<=0; wb_cyc_o<=1'b1; wb_stb_o<=1'b1; wb_adr_o<=addr_i; wb_dat_o<=0; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end R_Wait: begin wb_we_o<=wb_we_o; wb_cyc_o<=wb_cyc_o; wb_stb_o<=wb_stb_o; wb_adr_o<=wb_adr_o; wb_dat_o<=wb_dat_o; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end R_Done: begin wb_we_o<=0; wb_cyc_o<=0; wb_stb_o<=0; wb_adr_o<=0; wb_dat_o<=0; ack_write_o<=0; ack_read_o<=1'b1; dat_o<=wb_dat_i; end default: begin wb_we_o<=0; wb_cyc_o<=0; wb_stb_o<=0; wb_adr_o<=0; wb_dat_o<=0; wb_sel_o<=0; wb_cti_o<=0; wb_bte_o<=0; ack_write_o<=0; ack_read_o<=0; dat_o<=0; end endcase end endmodule /************** EOF ****************/
wishbone信号,与slave类似。
见下图:
mkg_test模块:
见下图:
mkg_test模块:
mkg_top模块:
现在把code list如下:
ip_mkg.c:
/* * * rill mkg driver * */ #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/fs.h> #include <asm/uaccess.h> /* get_user and put_user */ //#include <linux/clk.h> //#include <linux/ioport.h> #include <asm/io.h> /*ioremap*/ #include <linux/platform_device.h> /*cleanup_module*/ #include <linux/delay.h> #include <asm-generic/io.h> #include "ip_mkg.h" void __iomem *g_mkg_mem_base = NULL; void __iomem *g_mkg_core_base = NULL; static int device_open(struct inode *inode, struct file *file) { g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN); g_mkg_core_base = ioremap (MKG_CORE_BASE, MKG_CORE_LEN); if(NULL == g_mkg_mem_base) { printk(KERN_ERR "mkg mem open ioremap error!\n"); return -1; } else { printk("mkg mem ioremap addr:%d!\n",(int)g_mkg_mem_base); } if(NULL == g_mkg_core_base) { printk(KERN_ERR "mkg core open ioremap error!\n"); return -1; } else { printk("mkg core ioremap addr:%d!\n",(int)g_mkg_core_base); } return 0; } static int device_release(struct inode *inode, struct file *file) { return 0; } static ssize_t device_read(struct file *filp, char *buffer, size_t length, loff_t *offset) { /*int ret_val = 0; char * data = NULL; data = (char*)kmalloc(4, GFP_KERNEL); if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) ioread32(g_mkg_mem_base+length); printk("============read:%d\n",);*/ return 1; } static ssize_t device_write(struct file *filp, const char *buffer, size_t count, loff_t *offset) { //iowrite32(2,g_mkg_mem_base); return 1; } long device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) { #if 0 int ret_val = 0; unsigned int ret = 0; struct reg_data *new_regs; printk("ioctl======\n"); switch(ioctl_num) { case IOCTL_REG_SET: { new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL); if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) { kfree(new_regs); printk(KERN_ERR " error copy line_datafrom user.\n"); return -1; } //iowrite16(new_regs->value,g_mkg_mem_base+new_regs->addr); kfree(new_regs); } break; case IOCTL_REG_GET: { new_regs = (struct reg_data*)kmalloc(sizeof(struct reg_data), GFP_KERNEL); if((ret_val = copy_from_user(new_regs, (struct reg_data*)ioctl_param, sizeof(struct reg_data))) != 0) { kfree(new_regs); printk(KERN_ERR " error copy line_datafrom user.\n"); return -1; } //ret = ioread16(g_mkg_mem_base+new_regs->addr); kfree(new_regs); return ret; } break; } #endif return -1; } struct file_operations our_file_ops = { .unlocked_ioctl = device_ioctl, .read = device_read, .write = device_write, .open = device_open, .release = device_release, .owner = THIS_MODULE, }; void test(void) { int write_data[256]={ 0x69016901, 0x8b014401, 0xa7011a01, 0xbd01ee00, 0xcd01bf00, 0xd7018f00, 0xdb015e00, 0xd8012e00, 0xcf010000, 0xc101d403, 0xad01ab03, 0x94018603, 0x78016503, 0x58014803, 0x35013203, 0x11012003, 0xec001403, 0xc7000e03, 0xa3000c03, 0x80001003, 0x5f001903, 0x42002503, 0x28003603, 0x12004803, 0x00005e03, 0xf3037403, 0xe9038b03, 0xe403a303, 0xe303b903, 0xe603ce03, 0xec03e203, 0xf503f203, 0x00000000, 0x0d000a00, 0x1c001200, 0x2b001700, 0x3a001800, 0x49001600, 0x56001100, 0x62000900, 0x6c000000, 0x7400f503, 0x7900e803, 0x7c00db03, 0x7b00cd03, 0x7800c003, 0x7300b303, 0x6b00a803, 0x61009f03, 0x56009703, 0x4a009203, 0x3c008f03, 0x2f008e03, 0x21009003, 0x15009503, 0x09009c03, 0x0000a403, 0xf803af03, 0xf203ba03, 0xef03c603, 0xee03d303, 0xef03e003, 0xf303ec03, 0xf903f703, 0x00000000, 0x09000700, 0x14000d00, 0x20001100, 0x2d001200, 0x3a001100, 0x46000e00, 0x52000800, 0x5c000000, 0x6400f703, 0x6b00eb03, 0x7000df03, 0x7200d103, 0x7100c403, 0x6e00b603, 0x6900aa03, 0x61009f03, 0x58009503, 0x4d008d03, 0x40008803, 0x33008503, 0x25008403, 0x18008703, 0x0b008c03, 0x00009403, 0xf7039e03, 0xef03aa03, 0xea03b703, 0xe803c603, 0xe903d503, 0xee03e403, 0xf603f303, 0x00000000, 0x0e000b00, 0x1e001400, 0x32001a00, 0x47001d00, 0x5d001c00, 0x75001700, 0x8c000d00, 0xa2000000, 0xb800ee03, 0xcb00d803, 0xdb00be03, 0xe700a103, 0xf0008003, 0xf4005d03, 0xf2003903, 0xec001403, 0xe000ef02, 0xcf00cb02, 0xb800a802, 0x9b008802, 0x7a006c02, 0x55005302, 0x2c003f02, 0x00003102, 0xd2032802, 0xa2032502, 0x71032902, 0x41033302, 0x12034302, 0xe6025902, 0xbc027502, 0x97029702, 0x7502bc02, 0x5902e602, 0x43021203, 0x33024103, 0x29027103, 0x2502a203, 0x2802d203, 0x31020000, 0x3f022c00, 0x53025500, 0x6c027a00, 0x88029b00, 0xa802b800, 0xcb02cf00, 0xef02e000, 0x1403ec00, 0x3903f200, 0x5d03f400, 0x8003f000, 0xa103e700, 0xbe03db00, 0xd803cb00, 0xee03b800, 0x0000a200, 0x0d008c00, 0x17007500, 0x1c005d00, 0x1d004700, 0x1a003200, 0x14001e00, 0x0b000e00, 0x00000000, 0xf303f603, 0xe403ee03, 0xd503e903, 0xc603e803, 0xb703ea03, 0xaa03ef03, 0x9e03f703, 0x94030000, 0x8c030b00, 0x87031800, 0x84032500, 0x85033300, 0x88034000, 0x8d034d00, 0x95035800, 0x9f036100, 0xaa036900, 0xb6036e00, 0xc4037100, 0xd1037200, 0xdf037000, 0xeb036b00, 0xf7036400, 0x00005c00, 0x08005200, 0x0e004600, 0x11003a00, 0x12002d00, 0x11002000, 0x0d001400, 0x07000900, 0x00000000, 0xf703f903, 0xec03f303, 0xe003ef03, 0xd303ee03, 0xc603ef03, 0xba03f203, 0xaf03f803, 0xa4030000, 0x9c030900, 0x95031500, 0x90032100, 0x8e032f00, 0x8f033c00, 0x92034a00, 0x97035600, 0x9f036100, 0xa8036b00, 0xb3037300, 0xc0037800, 0xcd037b00, 0xdb037c00, 0xe8037900, 0xf5037400, 0x00006c00, 0x09006200, 0x11005600, 0x16004900, 0x18003a00, 0x17002b00, 0x12001c00, 0x0a000d00, 0x00000000, 0xf203f503, 0xe203ec03, 0xce03e603, 0xb903e303, 0xa303e403, 0x8b03e903, 0x7403f303, 0x5e030000, 0x48031200, 0x36032800, 0x25034200, 0x19035f00, 0x10038000, 0x0c03a300, 0x0e03c700, 0x1403ec00, 0x20031101, 0x32033501, 0x48035801, 0x65037801, 0x86039401, 0xab03ad01, 0xd403c101, 0x0000cf01, 0x2e00d801, 0x5e00db01, 0x8f00d701, 0xbf00cd01, 0xee00bd01, 0x1a01a701, 0x44018b01 }; int read_rslt[256]; int loop1= 0; int loop2= 0; int loop3= 0; int loop4= 0; int loop5= 0; int temp= 0; printk("<----ip_mkg test start---->\n"); for(loop1=0;loop1<256;loop1++) read_rslt[loop1]=0x98766789; printk("<----the initialization of result --->\n"); for(loop2=0;loop2<256;loop2++) { iowrite32(write_data[loop2],g_mkg_mem_base+(loop2*4)); } printk("<----write orignal data --->\n"); iowrite32(0x01000000,g_mkg_core_base+0x4); printk("<---write control data --->\n"); //printk("<----write control data: 0x01000000 end value:0x%x ---->\n",ioread32(g_mkg_core_base+4)); // iowrite32(0x03000000,g_mkg_core_base+0x8); // printk("<----write control data: 0x03000000 end value:0x%x---->\n",ioread32(g_mkg_core_base+8)); // mdelay(100); // mdelay(100); // mdelay(100); // mdelay(100); // printk("<----delay ends ---->\n"); // temp=ioread32(g_mkg_core_base); // printk("<-------my core status:0x%x--->\n",temp); /* while(1) { temp=ioread32(g_mkg_mem_base+0x804); if(temp==0x10101010) break; printk("<-------my core status:0x%x--->\n",temp); mdelay(1); } printk("<----waiting ends ---->\n"); */ mdelay(100); printk("<----delay ends ---->\n"); for(loop3=0;loop3<256;loop3++) { read_rslt[loop3]=ioread32(g_mkg_mem_base+0x00000400+(loop3*4)); } printk("<----read rslt from ram ---->\n"); temp=ioread32(g_mkg_mem_base+0x00000800); printk("<-------my clock cnt:0x%x--->\n",temp); for(loop4=0;loop4<256;loop4++) { printk("====mem read addr==0x%x==mem value:0x%x==\n",loop4,read_rslt[loop4]); } /* for(loop2=0;loop2<256;loop2++) { temp=ioread32(temp_addr); printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp); temp_addr=temp_addr+4; } udelay(1000); printk("<----ip_mkg read initial value ends---->\n"); temp_addr=g_mkg_mem_base; for(loop=0;loop<256;loop++) { iowrite32(loop,temp_addr); printk("====mem write addr==0x%x==mem value:0x%x==\n",temp_addr,loop); temp_addr=temp_addr+4; } udelay(1000); printk("<----ip_mkg write end---->\n\n\n"); temp_addr=g_mkg_mem_base; for(loop1=0;loop1<256;loop1++) { temp=ioread32(temp_addr); printk("====mem read addr==0x%x==mem value:0x%x==\n",temp_addr,temp); temp_addr=temp_addr+4; } printk("<----ip_mkg test end---->\n"); #endif int loop = 0; unsigned int phy_addr1 = 0; unsigned int phy_addr2 = 0; int * virtual_addr1 = NULL; int * virtual_addr2 = NULL; printk("<----ip_mkg test start---->\n"); //=====ip_mkg reg test======================================================== #if 1 printk("reg test start==\n"); iowrite32(0x11223344,g_mkg_mem_base); iowrite32(0x00000097,g_mkg_mem_base+0x10); iowrite32(0x03000000,g_mkg_mem_base+4); printk("reg test start1==\n"); printk("reg test start2==\n"); printk("reg test start3==\n"); for(loop=0;loop<7;loop++) printk("====reg addr==0x%x==reg value:0x%x==\n",loop*4,ioread32(g_mkg_mem_base+4*loop)); #endif //========================================================================= //============mem write test #if 0 printk("mem write test start==\n"); iowrite32(0x97000004,g_mkg_mem_base); iowrite32(0x2,g_mkg_mem_base+0xc); printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc)); printk("======reg:14 value:0x%x==\n",ioread32(g_mkg_mem_base+0x14)); printk("======reg:18 value:0x%x==\n",ioread32(g_mkg_mem_base+0x18)); printk("======reg:1c value:0x%x==\n",ioread32(g_mkg_mem_base+0x1c)); printk("======reg:20 value:0x%x==\n",ioread32(g_mkg_mem_base+0x20)); printk("======reg:24 value:0x%x==\n",ioread32(g_mkg_mem_base+0x24)); for(loop = 0;loop<10;loop++) printk("wait=write=\n"); printk("wait=write=\n"); iowrite32(0x1,g_mkg_mem_base+0xc); printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc)); for(loop = 0;loop<10;loop++) printk("wait=read=\n"); printk("wait=read=\n"); printk("======reg:10 value:0x%x==\n",ioread32(g_mkg_mem_base+0x10)); printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc)); #endif //============mem read test #if 0 printk("mem read test start==\n"); virtual_addr1 = (int *)kmalloc(sizeof(int), GFP_KERNEL); virtual_addr2 = (int *)kmalloc(sizeof(int), GFP_KERNEL); *virtual_addr1 = 0x55; *virtual_addr2 = 0x66; phy_addr1 = virt_to_phys(virtual_addr1); phy_addr2 = virt_to_phys(virtual_addr2); printk("virtual addr1:0x%x==phy addr1:0x%x==\n",(int)virtual_addr1,phy_addr1); printk("virtual addr2:0x%x==phy addr2:0x%x==\n",(int)virtual_addr2,phy_addr2); iowrite32(phy_addr1,g_mkg_mem_base); iowrite32(0x1,g_mkg_mem_base+0xc); printk("wait=read=\n"); printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base)); printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc)); printk("====phy addr1==0x%x==ram value:0x%x==\n",phy_addr1,ioread32(g_mkg_mem_base+0x10)); printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc)); iowrite32(phy_addr2,g_mkg_mem_base); iowrite32(0x1,g_mkg_mem_base+0xc); printk("wait=2=\n"); printk("======reg:0 value:0x%x==\n",ioread32(g_mkg_mem_base)); printk("======reg:c value:0x%x==\n",ioread32(g_mkg_mem_base+0xc)); printk("====phy addr2==0x%x==ram value:0x%x==\n",phy_addr2,ioread32(g_mkg_mem_base+0x10)); printk("======reg:c value:0x%x==\n\n",ioread32(g_mkg_mem_base+0xc)); kfree(virtual_addr1); kfree(virtual_addr2); #endif printk("<----ip_mkg test end---->\n"); */ } int init_module() { int ret_val; int ret; int ret2; void __iomem *ret_from_request; void __iomem *ret_from_request2; //=== Allocate character device ret_val = register_chrdev(MAJOR_NUM, DEVICE_NAME, &our_file_ops); if (ret_val < 0) { printk(KERN_ALERT " device %s failed(%d)\n", DEVICE_NAME, ret_val); return ret_val; } ret = check_mem_region(MKG_MEM_BASE, MKG_MEM_LEN); if (ret < 0) { printk(KERN_ERR "mkg check_mem_region bussy error!\n"); return -1; } ret_from_request = request_mem_region(MKG_MEM_BASE, MKG_MEM_LEN, "ip_mkg"); ret2 = check_mem_region(MKG_CORE_BASE, MKG_CORE_LEN); if (ret2 < 0) { printk(KERN_ERR "mkg check_mem_region bussy error!\n"); return -1; } ret_from_request2 = request_mem_region(MKG_CORE_BASE, MKG_CORE_LEN, "ip_mkg"); //===ioremap mkg registers g_mkg_mem_base = ioremap(MKG_MEM_BASE,MKG_MEM_LEN); if(NULL == g_mkg_mem_base) { printk(KERN_ERR "mkg mem ioremap error!\n"); return -1; } else { ;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base); } g_mkg_core_base = ioremap(MKG_CORE_BASE,MKG_CORE_LEN); if(NULL == g_mkg_core_base) { printk(KERN_ERR "mkg core ioremap error!\n"); return -1; } else { ;//printk("mkg ioremap addr:%d!\n",(unsigned int)g_mkg_mem_base); } printk("mkg module init done!\n"); test(); return 0; } void cleanup_module() { release_mem_region(MKG_MEM_BASE, MKG_MEM_LEN); release_mem_region(MKG_CORE_BASE,MKG_CORE_LEN); unregister_chrdev(MAJOR_NUM, DEVICE_NAME); } MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rill zhen:[email protected]");
ip_mkg.h
#ifndef __IP_MKG_H__ #define __IP_MKG_H__ #define MAJOR_NUM 102 #define DEVICE_NAME "ip_mkg" #define MKG_MEM_BASE 0x98000000 #define MKG_MEM_LEN 3072 #define MKG_CORE_BASE 0x97000000 #define MKG_CORE_LEN 64 #define IOCTL_REG_SET 0 #define IOCTL_REG_GET 1 struct reg_data { unsigned short addr; int value; }; #endif
前面只是仿真,要想让它work,还要挂到arbiter_dbus上才行。
请参考:
http://blog.csdn.net/rill_zhen/article/details/8722664
和
http://blog.csdn.net/rill_zhen/article/details/8558463
我将可以work的整个工程也传上来了:
要想让一个工程能够work,道理很简单,但是有很多细节需要做,上面的内容是在仿真之后写的,
在做了很多工作后做才能在板子上work,有些地方与上面的内容稍微有些改动。
这个工程经过了用modelsim的仿真,在xilinx ZYNQ板子上用chipscope的仿真,经过ORPSoC的altera 的FPGA的板级验证。
主要做了如下优化:
1,将内部的mem,改为调用altera的库,减少le的使用量。
2,修改arbiter_dbus的仲裁策略为轮转。
3,对mkg_core模块进行了优化。
由于上传文件大小有限,我分成了两部分:
工程的第一部分:
http://download.csdn.net/detail/rill_zhen/5435013
工程的第二部分:
http://download.csdn.net/detail/rill_zhen/5435107
此外还有对应的linux的driver:
http://download.csdn.net/detail/rill_zhen/5435175
验证如下图:
可以与之前仿真时的fft计算结果对比,看到和仿真的结果相同。
注意:此次驱动读取计算结果采用的是延时/轮询的形式,如果采用中断方式,请参考:
http://blog.csdn.net/rill_zhen/article/details/8894856
前面介绍的都是硬件实现FFT运算,如果将这个IP作为一个硬件加速器来用的话,它和软件实现的加速效果如何呢?下面我们就做一个比较。
同样的计算量,分别统计硬件用时和软件用时,加速比=软件用时 / 硬件用时
1》硬件:在IP core内部增加一个counter,从reset开始到done结束,记录cycle数量,保存在一个寄存器中,驱动读取这个寄存器的值,并打印出来,然后根据时钟频率(50MHz),计算出硬件用时。
2》软件:两次调用gettimeofday()函数,计算差值,即软件用时。
3》具体操作步骤,请参考:
http://blog.csdn.net/rill_zhen/article/details/8700937
1》硬件:“<-------my clock cnt:0x94740200--->”,需要注意的是这个数和硬件内部的大小端不一致,需要转换一下,即正确的数值是0x00027494,十进制是160916,硬件用时(ms)=(160916/50M)*1000=3.218 ms,手动10次运行取平均值为3.12 ms.
2》软件:"Used Time:144.6",运行100次,取平均值,即软件用时为144.6 ms.
3》对比:如下图
1》硬件部分,
mkg_ram_wb.v:
module mkg_ram_wb ( wb_clk, wb_rst, wb_dat_i, wb_adr_i, wb_sel_i, wb_cti_i, wb_bte_i, wb_we_i, wb_cyc_i, wb_stb_i, wb_dat_o, wb_ack_o, wb_err_o, wb_rty_o, data_address, data_out, data_rden, data_wren, data_q, rslt_address, rslt_out, rslt_rden, rslt_wren, rslt_q ); input wb_clk; input wb_rst; input [31:0] wb_adr_i; input wb_stb_i; input wb_cyc_i; input [2:0] wb_cti_i; input [1:0] wb_bte_i; input [31:0] wb_dat_i; input [3:0] wb_sel_i; input wb_we_i; output reg [31:0] wb_dat_o; output reg wb_ack_o; output reg wb_err_o; output reg wb_rty_o; output reg [7:0] data_address; output reg [31:0] data_out; output reg data_rden; output reg data_wren; input [31:0] data_q; output reg [7:0] rslt_address; output reg [31:0] rslt_out; output reg rslt_rden; output reg rslt_wren; input [31:0] rslt_q; parameter my_ram_adr=8'h98; parameter data_adr_start=32'h9800_0000; parameter data_adr_end=32'h9800_03fc; parameter rslt_adr_start=32'h9800_0400; parameter rslt_adr_end=32'h9800_07fc; parameter error_code=32'habcd_dcba; parameter Numb=256; parameter cnt_adr=32'h9800_0800; parameter status_adr=32'h9800_0804; parameter Idle= 5'b00000; parameter Read_Data= 5'b00001; parameter Read_Rslt= 5'b00010; parameter Write_Data= 5'b00011; parameter Write_Rslt= 5'b00100; parameter Read_Data_Pause1= 5'b00101; parameter Read_Data_Pause2= 5'b00110; parameter Write_Data_Pause= 5'b00111; parameter Write_Rslt_Pause= 5'b01000; parameter Read_Rslt_Pause1= 5'b01001; parameter Read_Rslt_Pause2= 5'b01010; parameter Read_Data_Pause3= 5'b01011; parameter Read_Rslt_Pause3= 5'b01100; parameter Read_Cnt= 5'b01101; parameter Read_Status= 5'b01110; parameter Read_Cnt_Pause= 5'b01111; parameter Read_Cnt_Done= 5'b10000; /* parameter Idle= 5'b00000; parameter Read_Data= 5'b00001; parameter Read_Rslt= 5'b00010; parameter Write_Data= 5'b00011; parameter Write_Rslt= 5'b00100; parameter Read_Data_Pause1= 5'b00101; parameter Read_Data_Pause2= 5'b00110; parameter Write_Data_Pause= 5'b00111; parameter Write_Rslt_Pause= 5'b01000; parameter Read_Rslt_Pause1= 5'b01001; parameter Read_Rslt_Pause2= 5'b01010; parameter Read_Data_Pause3= 5'b01011; parameter Read_Rslt_Pause3= 5'b01100; parameter Read_Cnt= 5'b01101; parameter Read_Status= 5'b01110; parameter Read_Cnt_Pause1= 5'b01111; parameter Read_Cnt_Pause2= 5'b10000; parameter Read_Cnt_Pause3= 5'b10001; parameter Idle= 18'b000000000000000001; parameter Read_Data= 18'b000000000000000010; parameter Read_Rslt= 18'b000000000000000100; parameter Write_Data= 18'b000000000000001000; parameter Write_Rslt= 18'b000000000000010000; parameter Read_Data_Pause1= 18'b000000000000100000; parameter Read_Data_Pause2= 18'b000000000001000000; parameter Write_Data_Pause= 18'b000000000010000000; parameter Write_Rslt_Pause= 18'b000000000100000000; parameter Read_Rslt_Pause1= 18'b000000001000000000; parameter Read_Rslt_Pause2= 18'b000000010000000000; parameter Read_Data_Pause3= 18'b000000100000000000; parameter Read_Rslt_Pause3= 18'b000001000000000000; parameter Read_Cnt= 18'b000010000000000000; parameter Read_Status= 18'b000100000000000000; parameter Read_Cnt_Pause1= 18'b001000000000000000; parameter Read_Cnt_Pause2= 18'b010000000000000000; parameter Read_Cnt_Pause3= 18'b100000000000000000; */ parameter c_idle = 2'b00; parameter c_cnt = 2'b01; parameter c_done = 2'b10; reg [4:0] state,next_state; reg [1:0] c_state,c_next_state; reg [31:0] cnt; reg [31:0]status; reg [1:0] pause_cnt; always @(posedge wb_clk) if(wb_rst) c_state<=c_idle; else c_state<=c_next_state; always @(*) begin case(c_state) c_idle: if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == data_adr_start ) c_next_state = c_cnt; else c_next_state = c_idle; c_cnt: if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i == rslt_adr_end ) c_next_state = c_done; else c_next_state = c_cnt; c_done: c_next_state = c_done; endcase end always @(posedge wb_clk) if(wb_rst) begin cnt<=0; status<=0; end else case(c_next_state) c_idle: begin cnt<=0; status<=0; end c_cnt: begin cnt<=cnt+1; status<=0; end c_done: begin cnt<=cnt; status<=32'h0101_0101; end endcase /* reg [31:0] data_q_r,rslt_q_r; always @(posedge wb_clk) begin if(wb_rst) begin data_q_r<=0; end else begin data_q_r<=data_q; end end always @ (posedge wb_clk) begin if(wb_rst) begin rslt_q_r<=0; end else begin rslt_q_r<=rslt_q; end end */ always @(posedge wb_clk) begin if(wb_rst) begin state<=Idle; end else begin state<=next_state; end end always @(*) begin case(state) Idle: begin if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end) begin next_state=Read_Data; end else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end) begin next_state=Read_Rslt; end else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i >= cnt_adr) begin next_state=Read_Cnt; end else if(wb_stb_i && wb_cyc_i && !wb_we_i && wb_adr_i == status_adr) begin next_state=Read_Status; end else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=data_adr_start && wb_adr_i<=data_adr_end) begin next_state=Write_Data; end else if(wb_stb_i && wb_cyc_i && wb_we_i && wb_adr_i >=rslt_adr_start && wb_adr_i<=rslt_adr_end) begin next_state=Write_Rslt; end else begin next_state=Idle; end end Write_Data:begin next_state=Write_Data_Pause; end Write_Rslt:begin next_state=Write_Rslt_Pause; end Read_Data: begin next_state=Read_Data_Pause1; end Read_Rslt: begin next_state=Read_Rslt_Pause1; end Read_Data_Pause1: begin next_state=Read_Data_Pause2; end Read_Data_Pause2:begin next_state=Read_Data_Pause3; end Read_Rslt_Pause1: begin next_state=Read_Rslt_Pause2; end Read_Rslt_Pause2: begin next_state=Read_Rslt_Pause3; end Read_Data_Pause3: begin next_state=Idle; end Read_Rslt_Pause3: begin next_state=Idle; end Write_Data_Pause: begin next_state=Idle; end Write_Rslt_Pause: begin next_state=Idle; end Read_Cnt: begin next_state=Read_Cnt_Pause; end Read_Cnt_Pause: begin if(pause_cnt<2'b11) next_state=Read_Cnt_Pause; else next_state=Read_Cnt_Done; end Read_Cnt_Done: begin next_state=Idle; end Read_Status: begin next_state=Idle; end default: begin next_state=Idle; end endcase end always @(posedge wb_clk) begin if(wb_rst) begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; pause_cnt<=0; end else begin case(next_state) Idle: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Write_Data: begin data_address<=wb_adr_i[9:2]; data_out<=wb_dat_i; data_wren<=1'b1; data_rden <=0; wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Write_Rslt: begin rslt_address<=wb_adr_i[9:2]; rslt_out <=wb_dat_i; rslt_wren <=1'b1; rslt_rden <=0; wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; end Read_Data: begin data_address<=wb_adr_i[9:2]; data_out <=0; data_rden <=1'b1; data_wren <=0; wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Rslt: begin rslt_address<=wb_adr_i[9:2]; rslt_out <=0; rslt_rden <=1'b1; rslt_wren <=0; wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; end Read_Data_Pause1: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Data_Pause2: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Data_Pause3: begin wb_err_o<=0; wb_rty_o<=0; wb_ack_o<=1'b1; wb_dat_o<=data_q; // wb_dat_o<=data_q_r+32'h0000_0010; // wb_dat_o<=data_q+32'h0000_0010; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Rslt_Pause1: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Rslt_Pause2: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Rslt_Pause3: begin wb_err_o<=0; wb_rty_o<=0; wb_ack_o<=1'b1; wb_dat_o<=rslt_q; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Write_Data_Pause: begin wb_ack_o <=1'b1; wb_dat_o <=error_code; wb_err_o <=0; wb_rty_o <=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Write_Rslt_Pause: begin wb_ack_o <=1'b1; wb_dat_o <=error_code; wb_err_o <=0; wb_rty_o <=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Cnt: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; pause_cnt <=0; end Read_Cnt_Pause: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; pause_cnt<=pause_cnt+1; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Cnt_Done: begin wb_dat_o<=cnt; // wb_dat_o<=32'h0101_0101; wb_ack_o<=1'b1; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; pause_cnt <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end Read_Status: begin wb_dat_o<=status; wb_ack_o<=1'b1; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end default: begin wb_dat_o<=0; wb_ack_o<=0; wb_err_o<=0; wb_rty_o<=0; data_address <=0; data_out <=0; data_rden <=0; data_wren <=0; rslt_address <=0; rslt_out <=0; rslt_rden <=0; rslt_wren <=0; end endcase end end endmodule
2》软件部分代码:
fft256.c:
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <sys/timeb.h> #include <time.h> typedef struct { double real; double img; } COMPLEX; typedef struct { long tv_sec; long tv_usec; } timeval; #define PI 3.14159265358979323846 void FFT(COMPLEX*,int nfft); void IFFT(COMPLEX*,int nfft); //inverse FFT void common_fft(COMPLEX*,int nfft,int isign); COMPLEX EE(COMPLEX a,COMPLEX b); int main(int argc,char* argv[]) { timeval tpstart, tpend; int i; int Nx; int NFFT; COMPLEX *x; double timeuse; int count; Nx=256; printf("Nx = %d\n",Nx); gettimeofday(&tpstart,NULL); for( count = 0; count <= 100; count++){ /* caculate NFFT as the next higer power of 2 >=Nx*/ NFFT = (int)pow(2.0,ceil(log((double)Nx)/log(2.0))); // printf("NFFT = %d \n",NFFT); /* allocate memory for NFFT complex numbers*/ x=(COMPLEX*)malloc(NFFT*sizeof(COMPLEX)); /* input test data*/ for(i=0;i<Nx;i++) { x[i].real== i; x[i].img=0.0; } /* caculate FFT */ FFT(x,NFFT); } gettimeofday(&tpend,NULL); timeuse=1000000*(tpend.tv_sec-tpstart.tv_sec)+tpend.tv_usec-tpstart.tv_usec; printf("Used Time:%lf\n",timeuse); // system("pause"); return 0; } void FFT(COMPLEX* x, int nfft) { common_fft(x,nfft,1); } void IFFT(COMPLEX* x,int nfft) { int i; common_fft(x,nfft,-1); for(i=0;i<nfft;i++) { x[i].real /= nfft; x[i].img /= nfft; } } /* fft kernel */ /* isign: 1 for FFT , -1 for IFFT */ void common_fft(COMPLEX* x,int nfft,int isign) { int i,j=0,k; COMPLEX t; for(i=0;i<nfft-1;i++) { if(i<j) { t=x[j]; x[j]=x[i]; x[i]=t; } k=nfft/2; while(k<=j) { j-=k; k/=2; } j+=k; } int stage,le,lei,ip; COMPLEX u,w; j= nfft; for(stage=1;(j=j/2)!=1;stage++); //caculate stage,which represents butterfly stages for(k=1;k<=stage;k++) { le=2<<(k-1); lei=le/2; u.real=1.0;// u,butterfly factor initial value u.img=0.0; w.real=cos(PI/lei*isign); w.img=sin(PI/lei*isign); for(j=0;j<=lei-1;j++) { for(i=j;i<=nfft-1;i+=le) { ip=i+lei; t=EE(x[ip],u); x[ip].real=x[i].real-t.real; x[ip].img=x[i].img-t.img; x[i].real=x[i].real+t.real; x[i].img=x[i].img+t.img; } u=EE(u,w); } } } COMPLEX EE(COMPLEX a,COMPLEX b) { COMPLEX c; c.real=a.real*b.real-a.img*b.img; c.img=a.real*b.img+a.img*b.real; return c; }
由于这个工程是一个实验性质的,目的在于说明SOC的开发的关键技术和流程,暂时没做backend flow。
如果感兴趣,可参考:
《Advanced ASIC Chip Synthesis: Using Synopsys Design Compiler, Physical Compiler and PrimeTime 》。
有中文版翻译《高级ASIC芯片综合》。