RISC-V(发音为“risk-five”)是一个基于精简指令集(RISC)原则的开源指令集架构(ISA)。RISC-V架构主要由美国加州大学伯克利分校的开发人员于2010年发明。
与大多数指令集相比,RISC-V指令集可以自由地用于任何目的,允许任何人设计、制造和销售RISC-V芯片和软件。虽然这不是第一个开源指令集,但它具有重要意义,因为其设计使其适用于现代计算设备(如仓库规模云计算机、高端移动电话和微小嵌入式系统)。设计者考虑到了这些用途中的性能与功率效率,但并没有对特定的微架构做过度的设计。该指令集还具有众多支持的软件,这解决了新指令集通常的弱点。
RISC-V官方文档规范:https://riscv.org/technical/specifications/
RISC-V中文手册:http://crva.io/documents/RISC-V-Reader-Chinese-v2p1.pdf
指令类型 | 用途 |
---|---|
R 类型指令 | 用于寄存器—寄存器操作 |
I 类型指令 | 用于短立即数和访存 load 操作 |
S 类型指令 | 用于访存 store 操作 |
B 类型指令 | 用于条件跳转操作 |
U 类型指令 | 用于长立即数操作 |
J 类型指令 | 用于无条件操作 |
RISC-V的指令集是模块化的。基础模块包含RV32I(基础的32位整数指令集,32位地址空间,寄存器是32位)、RV64I(基础的64位整数指令集,64位地址空间,寄存器是64位)、RV32E(嵌入式架构,仅有16个整数寄存器)、RV128I(基础的的128位整数指令集,支持128位地址空间);扩展模块包含M(支持乘法和除法指令)、A(支持原子操作指令和Load-Reserved/store-Conditional指令)、F(单精度浮点指令)、D(双精度浮点指令)、等等。
Note: 所有位全部为0 是非法的 RV32I 指令
寄存器是CPU上预先定义的可以用来存储数据的位置。汇编代码并不是在内存上执行,而是在寄存器上执行,也就是说,当CPU在做add,sub时,其实是对寄存器进行操作。所以通常的汇编代码中的模式是,通过load将数据存放在寄存器中,这里的数据源可以是来自内存,也可以来自另一个寄存器,之后,ALU在寄存器上执行一些操作,操作的结果会被store在某个地方,这里的目的地可能是内存中的某个地址,也可能是另一个寄存器。这就是通常使用寄存器的方法。
Note:RV32I的寄存器x0是硬件连接至32’h0,可以作为常量数值0取出来用,但是不可以改变x0寄存器的值;
Temporary标注的寄存器可以作为临时存储使用;
其他的寄存器按相应的功能定义使用即可。
由于想学习一下如何实现RISC-V指令的CPU以及控制相关的IP,因此参考了一些资料用了几天时间敲了这样一个SoC。
工程文件结构如下所示。
[IC@IC RISC-V_SOC_demo]$ ls
asm bench rtl sim
[IC@IC RISC-V_SOC_demo]$ ls ./asm
soc_test.S
[IC@IC RISC-V_SOC_demo]$ ls ./bench
tb_riscv_soc.sv
[IC@IC RISC-V_SOC_demo]$ ls ./sim
Makefile
[IC@IC RISC-V_SOC_demo]$ ls ./rtl
bus cpu perips soc
[IC@IC RISC-V_SOC_demo]$ ls ./rtl/bus
bus_mux.v
[IC@IC RISC-V_SOC_demo]$ ls ./rtl/cpu
riscv_cpu.v
[IC@IC RISC-V_SOC_demo]$ ls ./rtl/perips
memory.v per_gpio.v per_timer.v per_uart.v
[IC@IC RISC-V_SOC_demo]$ ls ./rtl/soc
riscv_soc.v
SoC比较简单,全是数字IP,共包含6个模块:riscv_cpu、bus_mux、memory、per_uart、per_gpio和per_timer。
riscv_soc.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: riscv_soc
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module riscv_soc
#(
parameter MEM_SIZE = 8192,
parameter FIRMWARE = ""
)
(
input clk_i,
input reset_i,
output lock_o,
input uart_rx_i,
output uart_tx_o,
input [31:0] gpio_in_i,
output [31:0] gpio_out_o
);
//-----------------------------------------------------------------------------
wire [31:0] iaddr_w;
wire [31:0] irdata_w;
wire ird_w;
wire [31:0] daddr_w;
wire [31:0] dwdata_w;
wire [31:0] drdata_w;
wire [1:0] dsize_w;
wire drd_w;
wire dwr_w;
riscv_cpu #(
.PC_SIZE(32),
.RESET_SP(MEM_SIZE)
)
u_riscv_cpu
(
.clk_i(clk_i),
.reset_i(reset_i),
.lock_o(lock_o),
.iaddr_o(iaddr_w),
.irdata_i(irdata_w),
.ird_o(ird_w),
.daddr_o(daddr_w),
.dwdata_o(dwdata_w),
.drdata_i(drdata_w),
.dsize_o(dsize_w),
.drd_o(drd_w),
.dwr_o(dwr_w)
);
//-----------------------------------------------------------------------------
localparam N_SLAVES = 4;
wire [31:0] mem_addr_w, uart_addr_w, gpio_addr_w, timer_addr_w;
wire [31:0] mem_wdata_w, uart_wdata_w, gpio_wdata_w, timer_wdata_w;
wire [31:0] mem_rdata_w, uart_rdata_w, gpio_rdata_w, timer_rdata_w;
wire [1:0] mem_size_w, uart_size_w, gpio_size_w, timer_size_w;
wire mem_rd_w, uart_rd_w, gpio_rd_w, timer_rd_w;
wire mem_wr_w, uart_wr_w, gpio_wr_w, timer_wr_w;
wire mem_sel_w, uart_sel_w, gpio_sel_w, timer_sel_w;
assign mem_sel_w = (4'h0 == daddr_w[31:28]);
assign uart_sel_w = (4'h1 == daddr_w[31:28]);
assign gpio_sel_w = (4'h2 == daddr_w[31:28]);
assign timer_sel_w = (4'h3 == daddr_w[31:28]);
bus_mux #(
.N(N_SLAVES)
)
u_bus_mux
(
.clk_i(clk_i),
.reset_i(reset_i),
.ss_i({ mem_sel_w, uart_sel_w, gpio_sel_w, timer_sel_w }),
.m_addr_i(daddr_w),
.m_wdata_i(dwdata_w),
.m_rdata_o(drdata_w),
.m_size_i(dsize_w),
.m_rd_i(drd_w),
.m_wr_i(dwr_w),
.s_addr_o({ mem_addr_w, uart_addr_w, gpio_addr_w, timer_addr_w }),
.s_wdata_o({ mem_wdata_w, uart_wdata_w, gpio_wdata_w, timer_wdata_w }),
.s_rdata_i({ mem_rdata_w, uart_rdata_w, gpio_rdata_w, timer_rdata_w }),
.s_size_o({ mem_size_w, uart_size_w, gpio_size_w, timer_size_w }),
.s_rd_o({ mem_rd_w, uart_rd_w, gpio_rd_w, timer_rd_w }),
.s_wr_o({ mem_wr_w, uart_wr_w, gpio_wr_w, timer_wr_w })
);
//----------------------------------------------------------------------------
memory #(
.SIZE(MEM_SIZE),
.FIRMWARE(FIRMWARE)
)
u_memory
(
.clk_i(clk_i),
.reset_i(reset_i),
.iaddr_i(iaddr_w),
.irdata_o(irdata_w),
.ird_i(ird_w),
.daddr_i(mem_addr_w),
.dwdata_i(mem_wdata_w),
.drdata_o(mem_rdata_w),
.dsize_i(mem_size_w),
.drd_i(mem_rd_w),
.dwr_i(mem_wr_w)
);
//----------------------------------------------------------------------------
per_uart u_uart
(
.clk_i(clk_i),
.reset_i(reset_i),
.addr_i(uart_addr_w),
.wdata_i(uart_wdata_w),
.rdata_o(uart_rdata_w),
.size_i(uart_size_w),
.rd_i(uart_rd_w),
.wr_i(uart_wr_w),
.uart_rx_i(uart_rx_i),
.uart_tx_o(uart_tx_o)
);
//----------------------------------------------------------------------------
per_gpio u_gpio
(
.clk_i(clk_i),
.reset_i(reset_i),
.addr_i(gpio_addr_w),
.wdata_i(gpio_wdata_w),
.rdata_o(gpio_rdata_w),
.size_i(gpio_size_w),
.rd_i(gpio_rd_w),
.wr_i(gpio_wr_w),
.gpio_in_i(gpio_in_i),
.gpio_out_o(gpio_out_o)
);
//----------------------------------------------------------------------------
per_timer u_timer
(
.clk_i(clk_i),
.reset_i(reset_i),
.addr_i(timer_addr_w),
.wdata_i(timer_wdata_w),
.rdata_o(timer_rdata_w),
.size_i(timer_size_w),
.rd_i(timer_rd_w),
.wr_i(timer_wr_w)
);
endmodule
SoC中的总线并没有采用一些标准总线,读写定义特别简单:在时钟上升沿来临时rd_en为高电平则读出数据、wr_en为高电平则写入数据,且读写地址是共用的。
bus_mux.v 实现了一主多从的总线MUX,在模块外部由地址来确定Slave Select信号。
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: bus_mux
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module bus_mux
#(
parameter N = 2
)
(
input clk_i,
input reset_i,
input [N-1:0] ss_i, //select which Slave is used, only one bit is 1'b1
input [31:0] m_addr_i,
input [31:0] m_wdata_i,
output [31:0] m_rdata_o,
input [1:0] m_size_i,
input m_rd_i,
input m_wr_i,
output [32*N-1:0] s_addr_o,
output [32*N-1:0] s_wdata_o,
input [32*N-1:0] s_rdata_i,
output [2*N-1:0] s_size_o,
output [N-1:0] s_rd_o,
output [N-1:0] s_wr_o
);
//-----------------------------------------------------------------------------
//- output to Slave
//-----------------------------------------------------------------------------
assign s_addr_o = {N{m_addr_i}};
assign s_wdata_o = {N{m_wdata_i}};
assign s_size_o = {N{m_size_i}};
assign s_rd_o = {N{m_rd_i}} & ss_i;
assign s_wr_o = {N{m_wr_i}} & ss_i;
//-----------------------------------------------------------------------------
//- Master read data switch
//-----------------------------------------------------------------------------
reg [31:0] rdata_switch_r;
integer i;
always @(*)
begin
rdata_switch_r = 32'h0; //avoid LATCH
for(i=0; i<N; i=i+1)
if(ss_i[i])
rdata_switch_r = s_rdata_i[32*(i+1)-1 -:32];
end
reg [31:0] rdata_r; //rdata output from DFF
always @(posedge clk_i)
begin
if (reset_i)
rdata_r <= 32'h0;
else
rdata_r <= rdata_switch_r;
end
assign m_rdata_o = rdata_r;
endmodule
从riscv_soc.v中的88~91行可以看出,外设地址的定义如下表所示。
外设 | 地址区间 |
---|---|
Memory | 0x00000000 ~ 0x0FFFFFFF |
UART | 0x10000000 ~ 0x1FFFFFFF |
GPIO | 0x20000000 ~ 0x2FFFFFFF |
TIMER | 0x30000000 ~ 0x3FFFFFFF |
CPU采用冯·诺依曼体系结构,即指令和数据存储在同一个Memory中,指令与数据的位宽也相同。
CPU的处理流程参考了以下电路结构,但是目前CPU只实现了多周期处理,还没有pipeline起来,所以处理速度较慢,固定8个时钟周期执行一条指令,有待改进为pipeline结构。
要想实现全部RISC-V指令,工作量是相当大的,目前这个CPU只实现了以下指令,对于一些简单的功能和控制当前的SoC外设是足够的。
加载和存入指令 | 具体功能 | 指令类型 |
---|---|---|
lui | 高位立即数加载 | U |
lw | 字加载 | I |
sw | 存字 | S |
跳转指令 | 具体功能 | 指令类型 |
---|---|---|
jal | 跳转并链接 | J |
jalr | 跳转并寄存器链接 | I |
beq | 相等时分支 | B |
bgeu | 无符号大于等于时分支 | B |
bltu | 无符号小于时分支 | B |
加减指令 | 具体功能 | 指令类型 |
---|---|---|
add | 加 | R |
addi | 加立即数 | I |
sub | 减 | R |
位运算指令 | 具体功能 | 指令类型 |
---|---|---|
and | 与 | R |
or | 或 | R |
xor | 异或 | R |
andi | 与立即数 | I |
ori | 立即数取或 | I |
xori | 立即数异或 | I |
srl | 逻辑右移 | R |
sll | 逻辑左移 | R |
下面是每条指令的详细说明:
CPU代码为了方便写blog整理,我就写在一个module里了。功能就是取指、译码、执行、访存、写回,具体的不详细讲解了,注释标的比较明白,有计算机体系结构和Verilog基础就可以看懂。
riscv_cpu.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: riscv_cpu
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module riscv_cpu
#(
parameter PC_SIZE = 32,
parameter RESET_SP = 32'h2000
)
(
input clk_i,
input reset_i,
output lock_o, //Instruction Illegal -> CPU lock
output [31:0] iaddr_o, //Instruction Fetch addr
input [31:0] irdata_i, //Instruction Fetch data
output ird_o, //Instruction Fetch enable
output [31:0] daddr_o, //BUS (wr & rd)_addr
output [31:0] dwdata_o, //BUS wr_data
input [31:0] drdata_i, //BUS rd_data
output [1:0] dsize_o, //define the BUS data size
output drd_o, //BUS rd_en
output dwr_o //BUS wr_en
);
//-----------------------------------------------------------------------------
//- Parameter Definitions
//-----------------------------------------------------------------------------
localparam
ALU_ADD = 4'd0,
ALU_SUB = 4'd1,
ALU_AND = 4'd2,
ALU_OR = 4'd3,
ALU_XOR = 4'd4,
ALU_SLT = 4'd5,
ALU_SLTU = 4'd6,
ALU_SHL = 4'd7,
ALU_SHR = 4'd8,
ALU_MULL = 4'd9,
ALU_MULH = 4'd10,
ALU_DIV = 4'd11,
ALU_REM = 4'd12,
ALU_NPC = 4'd13,
ALU_AUIPC = 4'd14;
localparam
BR_NONE = 3'd0,
BR_JUMP = 3'd1,
BR_EQ = 3'd2,
BR_NE = 3'd3,
BR_LT = 3'd4,
BR_GE = 3'd5,
BR_LTU = 3'd6,
BR_GEU = 3'd7;
localparam
SIZE_BYTE = 2'd0,
SIZE_HALF = 2'd1,
SIZE_WORD = 2'd2;
//-----------------------------------------------------------------------------
//- constant output
//-----------------------------------------------------------------------------
assign dsize_o = SIZE_WORD; //memory access' data is always 32bit
//-----------------------------------------------------------------------------
//- phase loop control
//-----------------------------------------------------------------------------
reg [3:0] phase_r;
localparam PHASE_MAX = 4'd7;
always @(posedge clk_i)
if(reset_i | lock_o)
phase_r <= 4'h0;
else if(phase_r == PHASE_MAX)
phase_r <= 4'h0;
else
phase_r <= phase_r + 1'b1;
//-----------------------------------------------------------------------------
//- instruction read enable control
//-----------------------------------------------------------------------------
reg [PC_SIZE-1:0] pc_r;//"PC" reg
reg ird_en_r;
always @(posedge clk_i)
if(reset_i | lock_o)
ird_en_r <= 1'b0;
else if(phase_r == 4'd1)
ird_en_r <= 1'b1;
else
ird_en_r <= 1'b0;
assign ird_o = ird_en_r;
//-----------------------------------------------------------------------------
//- Instruction Decode
//-----------------------------------------------------------------------------
wire [31:0] instruction_w;
assign instruction_w[31:0] = irdata_i[31:0];
wire [6:0] op_w; //opcode
wire [4:0] rd_w; //rd
wire [2:0] f3_w; //funct3
wire [4:0] ra_w; //rs1
wire [4:0] rb_w; //rs2
wire [6:0] f7_w; //funct7
wire [31:0] imm_i_w; //I-type immediate
wire [31:0] imm_s_w; //S-type immediate
wire [31:0] imm_b_w; //B-type immediate
wire [31:0] imm_u_w; //U-type immediate
wire [31:0] imm_j_w; //J-type immediate
//RISC-V Instruction Format Decode from 6 types
assign op_w[6:0] = instruction_w[6:0]; //opcode
assign rd_w[4:0] = instruction_w[11:7]; //rd
assign f3_w[2:0] = instruction_w[14:12]; //funct3
assign ra_w[4:0] = instruction_w[19:15]; //rs1
assign rb_w[4:0] = instruction_w[24:20]; //rs2
assign f7_w[6:0] = instruction_w[31:25]; //funct7
assign imm_i_w[31:0] = { {20{instruction_w[31]}}, instruction_w[31:20] };
assign imm_s_w[31:0] = { {20{instruction_w[31]}}, instruction_w[31:25], instruction_w[11:7] };
assign imm_b_w[31:0] = { {19{instruction_w[31]}}, instruction_w[31], instruction_w[7], instruction_w[30:25], instruction_w[11:8], 1'b0 };
assign imm_u_w[31:0] = { instruction_w[31:12], 12'h0 };
assign imm_j_w[31:0] = { {11{instruction_w[31]}}, instruction_w[31], instruction_w[19:12], instruction_w[20], instruction_w[30:21], 1'b0 };
//load & store Instruction
wire lui_w,
lw_w,
sw_w;
//jump & branch Instruction
wire jal_w,
jalr_w,
beq_w,
bgeu_w,
bltu;
//add & sub Instruction
wire add_w,
sub_w,
addi_w;
//bit operate Instruction
wire and_w,
or_w,
xor_w,
andi_w,
ori_w,
xori_w,
srl_w,
sll_w;
assign lui_w = (7'b0110111 == op_w); //load upper (bit) immediate
assign lw_w = (7'b0000011 == op_w) && (3'b010 == f3_w); //load word
assign sw_w = (7'b0100011 == op_w) && (3'b010 == f3_w); //store word
assign jal_w = (7'b1101111 == op_w); //jump and link
assign jalr_w = (7'b1100111 == op_w) && (3'b010 == f3_w); //jump and link register
assign beq_w = (7'b1100011 == op_w) && (3'b000 == f3_w); //branch if equal
assign bgeu_w = (7'b1100011 == op_w) && (3'b111 == f3_w); //branch if greater than or equal, unsigned
assign bltu_w = (7'b1100011 == op_w) && (3'b110 == f3_w); //branch if less than, unsigned
assign add_w = (7'b0110011 == op_w) && (3'b000 == f3_w) && (7'b0000000 == f7_w); //add
assign sub_w = (7'b0110011 == op_w) && (3'b000 == f3_w) && (7'b0100000 == f7_w); //sub
assign addi_w = (7'b0010011 == op_w) && (3'b000 == f3_w); //add immediate number
assign and_w = (7'b0110011 == op_w) && (3'b111 == f3_w) && (7'b0000000 == f7_w);
assign or_w = (7'b0110011 == op_w) && (3'b110 == f3_w) && (7'b0000000 == f7_w);
assign xor_w = (7'b0110011 == op_w) && (3'b100 == f3_w) && (7'b0000000 == f7_w);
assign andi_w = (7'b0010011 == op_w) && (3'b111 == f3_w);//and immediate
assign ori_w = (7'b0010011 == op_w) && (3'b110 == f3_w);//or immediate
assign xori_w = (7'b0010011 == op_w) && (3'b100 == f3_w);//xor immediate
assign srl_w = (7'b0110011 == op_w) && (3'b101 == f3_w) && (7'b0000000 == f7_w);//shift right logical
assign sll_w = (7'b0110011 == op_w) && (3'b001 == f3_w) && (7'b0000000 == f7_w);//shift left logical
wire illegal_instr_w; //illegal Instruction
assign illegal_instr_w = !( lui_w || lw_w ||sw_w ||
jal_w || jalr_w ||
beq_w || bgeu_w ||bltu_w ||
add_w || sub_w ||addi_w ||
and_w || or_w ||xor_w ||
andi_w || ori_w ||xori_w ||
srl_w || sll_w );
wire [31:0] imm_switch_w; //switch which kind of immediate is using in current Instruction
assign imm_switch_w[31:0] = (lui_w) ? imm_u_w :
(beq_w || bgeu_w ||bltu_w) ? imm_b_w :
(lw_w || jalr_w || addi_w || xori_w || ori_w || andi_w) ? imm_i_w :
(sw_w) ? imm_s_w :
(jal_w) ? imm_j_w : 32'h0;
wire [4:0] rd_switch_w, ra_switch_w, rb_switch_w; //switch whether need reg's addr value in current Instruction
assign rd_switch_w[4:0] = ( lui_w || lw_w || jal_w || jalr_w || add_w ||
addi_w || sub_w || and_w || or_w || xor_w ||
andi_w || ori_w || xori_w || srl_w || sll_w) ? rd_w : 5'd0;//rd
assign ra_switch_w[4:0] = ( lw_w || sw_w ||
jalr_w || beq_w || bgeu_w || bltu_w || add_w ||
addi_w || sub_w || and_w || or_w || xor_w ||
andi_w || ori_w || xori_w || srl_w || sll_w) ? ra_w : 5'd0;//rs1
assign rb_switch_w[4:0] = ( sw_w || beq_w || bgeu_w || bltu_w ||
add_w || sub_w || and_w || or_w || xor_w ||
srl_w || sll_w ) ? rb_w : 5'd0;//rs2
wire [3:0] alu_operate_w;
assign alu_operate_w[3:0] = (add_w || addi_w || lui_w || lw_w || sw_w) ? ALU_ADD :
(andi_w || and_w) ? ALU_AND :
(ori_w || or_w) ? ALU_OR :
(xori_w || xor_w) ? ALU_XOR :
(sll_w) ? ALU_SHL :
(srl_w) ? ALU_SHR :
(jal_w || jalr_w) ? ALU_NPC : ALU_SUB;
//-----------------------------------------------------------------------------
//- Lock control
//-----------------------------------------------------------------------------
reg lock_r;
always @(posedge clk_i)
if(reset_i)
lock_r = 1'b0;
else if(phase_r == 4'd3 && illegal_instr_w)
begin
lock_r = 1'b1;
$display("%t Illegal Instruction. CPU is locked.\n", $time);
end
assign lock_o = lock_r;
//-----------------------------------------------------------------------------
//- Register Fetch
//-----------------------------------------------------------------------------
reg [31:0] reg_r [31:0];//register_file
reg [31:0] operator_a_r;
reg [31:0] operator_b_r;
always @(posedge clk_i)
if(reset_i | lock_o)
begin
operator_a_r <= 32'h0;
operator_b_r <= 32'h0;
end
else if(phase_r == 4'd3)
begin
operator_a_r <= reg_r[ra_switch_w];//x[rs1]
operator_b_r <= reg_r[rb_switch_w];//x[rs2]
end
//-----------------------------------------------------------------------------
//- load immediate
//-----------------------------------------------------------------------------
reg [31:0] imm_switch_r;
always @(posedge clk_i)
if(reset_i | lock_o)
imm_switch_r <= 32'h0;
else if(phase_r == 4'd3)
imm_switch_r <= imm_switch_w;
//-----------------------------------------------------------------------------
//- ALU calculate
//-----------------------------------------------------------------------------
reg [31:0] alu_result_r;
always @(*)
if(reset_i | lock_o)
alu_result_r = 32'h0;
else
case(alu_operate_w[3:0])
ALU_ADD:
if(add_w) alu_result_r = operator_a_r + operator_b_r;
else if(addi_w) alu_result_r = operator_a_r + imm_switch_r;
else if(lui_w) alu_result_r = imm_switch_r;
else if(lw_w) alu_result_r = operator_a_r + imm_switch_r;
else if(sw_w) alu_result_r = operator_a_r + imm_switch_r;
ALU_AND:
if(and_w) alu_result_r = operator_a_r & operator_b_r;
else if(andi_w) alu_result_r = operator_a_r & imm_switch_r;
ALU_OR :
if(or_w) alu_result_r = operator_a_r | operator_b_r;
else if(ori_w) alu_result_r = operator_a_r | imm_switch_r;
ALU_XOR:
if(xor_w) alu_result_r = operator_a_r ^ operator_b_r;
else if(xori_w) alu_result_r = operator_a_r ^ imm_switch_r;
ALU_SHL: alu_result_r = operator_a_r << operator_b_r;
ALU_SHR: alu_result_r = operator_a_r >> operator_b_r;
ALU_NPC:
if(jal_w) alu_result_r = pc_r + imm_switch_r;
else if(jalr_w) alu_result_r = operator_a_r + imm_switch_r;
ALU_SUB:
if(sub_w) alu_result_r = operator_a_r - operator_b_r;
default:
if(beq_w || bgeu_w || bltu_w)
alu_result_r = pc_r + imm_switch_r;
else alu_result_r = 32'h0;
endcase
reg [31:0] alu_result_buff_r;//make alu_result output from DFF
always @(posedge clk_i)
if(reset_i | lock_o)
alu_result_buff_r <= 32'h0;
else if(phase_r == 4'd4)
alu_result_buff_r <= alu_result_r;
//-----------------------------------------------------------------------------
//- Branch Taken
//-----------------------------------------------------------------------------
reg branch_taken_r;
reg [PC_SIZE-1:0] pc_jump_r;
always @(posedge clk_i)
if(reset_i | lock_o)
begin
branch_taken_r <= 1'b0;
pc_jump_r <= 'h0;
end
else if(phase_r == 4'd4)
begin
branch_taken_r <= ( jal_w || jalr_w ||
(beq_w && (operator_a_r == operator_b_r)) ||
(bgeu_w && (operator_a_r >= operator_b_r)) ||
(bltu_w && (operator_a_r < operator_b_r))
);
pc_jump_r <= alu_result_r;
end
//-----------------------------------------------------------------------------
//- Memory Access
//-----------------------------------------------------------------------------
reg mem_rd_en_r;
reg [31:0] mem_rd_addr_r;
reg mem_wr_en_r;
reg [31:0] mem_wr_addr_r;
reg [31:0] mem_wr_data_r;
always @(posedge clk_i)
if(reset_i | lock_o)
begin
mem_rd_en_r <= 1'b0;
mem_rd_addr_r <= 32'h0;
mem_wr_en_r <= 1'b0;
mem_wr_addr_r <= 32'h0;
mem_wr_data_r <= 32'h0;
end
else if(phase_r == 4'd5)
begin
if(lw_w)
begin
mem_rd_en_r <= 1'b1;
mem_rd_addr_r <= alu_result_buff_r;//x[rs1] + imm
end
else if(sw_w)
begin
mem_wr_en_r <= 1'b1;
mem_wr_addr_r <= alu_result_buff_r;//x[rs1] + imm
mem_wr_data_r <= operator_b_r;//x[rs2]
end
end
else
begin
mem_rd_en_r <= 1'b0;
mem_wr_en_r <= 1'b0;
end
assign daddr_o = lw_w ? mem_rd_addr_r : mem_wr_addr_r; //BUS (wr & rd)_addr
assign dwdata_o = mem_wr_data_r; //BUS wr_data
assign drd_o = mem_rd_en_r; //BUS rd_en
assign dwr_o = mem_wr_en_r; //BUS wr_en
//-----------------------------------------------------------------------------
//- PC control
//-----------------------------------------------------------------------------
//reg [PC_SIZE-1:0] pc_r;//"PC" reg
// PC start from 0 ;
// if(ird_en_r) PC = PC + 4;
// else if(jump_en) PC = PC_jump;
always @(posedge clk_i)
if(reset_i | lock_o)
pc_r <= 'h0;
else if(phase_r == 4'd5)
pc_r <= pc_r + 4;
else if(branch_taken_r)
pc_r <= pc_jump_r;
assign iaddr_o = pc_r;
//-----------------------------------------------------------------------------
//- Write Back
//-----------------------------------------------------------------------------
wire regfile_wr_en_w;
wire [31:0] regfile_wr_data_w;
assign regfile_wr_en_w = (lui_w || lw_w || jal_w || jalr_w ||
add_w || addi_w || sub_w ||
and_w || or_w || xor_w ||
andi_w || ori_w || xori_w ||
srl_w || sll_w);
assign regfile_wr_data_w = lui_w ? imm_switch_r :
lw_w ? alu_result_r :
jal_w ? pc_r + 4 :
jalr_w ? pc_r + 4 :
(add_w || addi_w || sub_w ||
and_w || or_w || xor_w ||
andi_w || ori_w || xori_w ||
srl_w || sll_w) ? alu_result_r : 32'h0;
integer k;
always @(posedge clk_i)
begin
if(reset_i | lock_o)
begin
for(k=0;k<32;k++)
reg_r[k] <= 32'h0;
end
else if(phase_r == 4'd7 && regfile_wr_en_w)
reg_r[rd_switch_w] <= regfile_wr_data_w;//write data into register_file
end
endmodule
由于CPU采用冯·诺依曼体系结构,因此Memory需要包含两个读数据端口和一个写数据端口。
memory.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: memory
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module memory
#(
parameter SIZE = 8192,//sum number of bytes
parameter FIRMWARE = ""
)
(
input clk_i,
input reset_i,
input [31:0] iaddr_i, //instruction read addr
output [31:0] irdata_o, //instruction read data
input ird_i, //instruction read en
input [31:0] daddr_i, //data (wr & rd) addr
input [31:0] dwdata_i, //data wr data
output [31:0] drdata_o, //data rd data
input [1:0] dsize_i, //data size
input drd_i, //data rd_en
input dwr_i //data wr_en
);
//-----------------------------------------------------------------------------
localparam
SIZE_BYTE = 2'd0,
SIZE_HALF = 2'd1,
SIZE_WORD = 2'd2;
localparam
DEPTH = $clog2(SIZE);
//-----------------------------------------------------------------------------
//- Define Memory
//-----------------------------------------------------------------------------
reg [31:0] mem_r [SIZE/4-1:0];
initial
begin
//$readmemh(FIRMWARE, mem_r);
end
//-----------------------------------------------------------------------------
//- instruction read out
//-----------------------------------------------------------------------------
reg [31:0] irdata_r;
always @(posedge clk_i)
begin
if(reset_i)
irdata_r <= 32'h0;
else if(ird_i)
begin
irdata_r <= mem_r[iaddr_i[DEPTH:2]];
$display("%t \tRead insturction: mem[%0d] = 0x%32x", $time, iaddr_i[DEPTH:2], mem_r[iaddr_i[DEPTH:2]]);
end
end
assign irdata_o = irdata_r;
//-----------------------------------------------------------------------------
//- data read out
//-----------------------------------------------------------------------------
wire [7:0] rdata_byte_w; //assign byte according to daddr
wire [15:0] rdata_half_w; //assign half word according to daddr
wire [31:0] drdata_w; //assign rd_data according to size
assign rdata_byte_w[7:0] = (2'b00 == daddr_i[1:0]) ? mem_r[daddr_i[DEPTH:2]][7:0] :
(2'b01 == daddr_i[1:0]) ? mem_r[daddr_i[DEPTH:2]][15:8] :
(2'b10 == daddr_i[1:0]) ? mem_r[daddr_i[DEPTH:2]][23:16] : mem_r[daddr_i[DEPTH:2]][31:24];
assign rdata_half_w[15:0] = daddr_i[1] ? mem_r[daddr_i[DEPTH:2]][31:16] : mem_r[daddr_i[DEPTH:2]][15:0];
assign drdata_w[31:0] = (SIZE_BYTE == dsize_i) ? { 24'b0, rdata_byte_w } :
(SIZE_HALF == dsize_i) ? { 16'b0, rdata_half_w } : mem_r[daddr_i[DEPTH:2]];
reg [31:0] drdata_r; //let rd_data output from DFF
always @(posedge clk_i)
begin
if(reset_i)
drdata_r <= 32'h0;
else if(drd_i)
begin
drdata_r <= drdata_w;
case(dsize_i)
SIZE_BYTE:$display("%t Read memory: mem[%0d] = 0x%32x (No.%0d byte)", $time, daddr_i[DEPTH:2], drdata_w, daddr_i[1:0]);
SIZE_HALF:$display("%t Read memory: mem[%0d] = 0x%32x (No.%0d half)", $time, daddr_i[DEPTH:2], drdata_w, daddr_i[1:0]);
default: $display("%t Read memory: mem[%0d] = 0x%32x (word)", $time, daddr_i[DEPTH:2], drdata_w);
endcase
end
end
assign drdata_o = drdata_r;
//-----------------------------------------------------------------------------
//- data write in
//-----------------------------------------------------------------------------
wire [31:0] dwdata_w; //assign wr_data according to size
wire [3:0] wdata_byte_w; //choose which byte to write in according to daddr
wire [3:0] wdata_half_w; //choose which half word to write in according to daddr
wire [3:0] dwr_strb_w; //choose which bytes to write in according to size
wire [31:0] wdata_w; //data written into memory
assign dwdata_w[31:0] = (SIZE_BYTE == dsize_i) ? {4{dwdata_i[7:0]}} :
(SIZE_HALF == dsize_i) ? {2{dwdata_i[15:0]}} : dwdata_i;
assign wdata_byte_w[3:0] = (2'b00 == daddr_i[1:0]) ? 4'b0001 :
(2'b01 == daddr_i[1:0]) ? 4'b0010 :
(2'b10 == daddr_i[1:0]) ? 4'b0100 : 4'b1000;
assign wdata_half_w[3:0] = daddr_i[1] ? 4'b1100 : 4'b0011;
assign dwr_strb_w[3:0] = (SIZE_BYTE == dsize_i) ? wdata_byte_w :
(SIZE_HALF == dsize_i) ? wdata_half_w : 4'b1111;
always @(posedge clk_i)
begin
if(dwr_strb_w[0] & dwr_i)
begin
mem_r[daddr_i[DEPTH:2]][7:0] <= dwdata_w[7:0];
$display("%t Write memory: mem[%0d][7:0] = 0x%8x", $time, daddr_i[DEPTH:2], dwdata_w[7:0]);
end
if(dwr_strb_w[1] & dwr_i)
begin
mem_r[daddr_i[DEPTH:2]][15:8] <= dwdata_w[15:8];
$display("%t Write memory: mem[%0d][15:8] = 0x%8x", $time, daddr_i[DEPTH:2], dwdata_w[15:8]);
end
if(dwr_strb_w[2] & dwr_i)
begin
mem_r[daddr_i[DEPTH:2]][23:16] <= dwdata_w[23:16];
$display("%t Write memory: mem[%0d][23:16] = 0x%8x", $time, daddr_i[DEPTH:2], dwdata_w[23:16]);
end
if(dwr_strb_w[3] & dwr_i)
begin
mem_r[daddr_i[DEPTH:2]][31:24] <= dwdata_w[31:24];
$display("%t Write memory: mem[%0d][31:24] = 0x%8x", $time, daddr_i[DEPTH:2], dwdata_w[31:24]);
end
end
endmodule
per_gpio.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: per_gpio
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module per_gpio
(
input clk_i,
input reset_i,
input [31:0] addr_i,
input [31:0] wdata_i,
output [31:0] rdata_o,
input [1:0] size_i,
input rd_i,
input wr_i,
input [31:0] gpio_in_i,
output [31:0] gpio_out_o
);
//-----------------------------------------------------------------------------
localparam
REG_OUT_WRITE = 8'h00,
REG_OUT_SET = 8'h04,
REG_OUT_CLR = 8'h08,
REG_OUT_TGL = 8'h0c,
REG_IN_READ = 8'h10;
//-----------------------------------------------------------------------------
//- GPIO output data
//-----------------------------------------------------------------------------
reg [31:0] gpio_out_r;
reg [31:0] gpio_out_data_r;
always @(*)
begin
gpio_out_data_r = 32'h0;
case(addr_i[7:0])
REG_OUT_WRITE: gpio_out_data_r = wdata_i;
REG_OUT_SET: gpio_out_data_r = gpio_out_r | wdata_i;
REG_OUT_CLR: gpio_out_data_r = gpio_out_r & ~wdata_i;
REG_OUT_TGL: gpio_out_data_r = gpio_out_r ^ wdata_i;
default: gpio_out_data_r = 32'h0;
endcase
end
always @(posedge clk_i)
begin
if(reset_i)
gpio_out_r <= 32'h0;
else if(wr_i)
begin
gpio_out_r <= gpio_out_data_r;
$display("%t GPIO_OUT_DATA = 0x%32x", $time, gpio_out_data_r[31:0]);
end
end
assign gpio_out_o = gpio_out_r;
//-----------------------------------------------------------------------------
//- read GPIO input data
//-----------------------------------------------------------------------------
reg [31:0] reg_data_r;
always @(posedge clk_i)
begin
if(reset_i)
reg_data_r <= 32'h0;
else if(rd_i)
begin
reg_data_r <= gpio_in_i;//All addresses are mapped to gpio_in_i
$display("%t GPIO_READ_DATA = 0x%32x", $time, gpio_in_i[31:0]);
end
end
assign rdata_o = reg_data_r;
endmodule
per_uart.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: per_uart
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module per_uart
(
input clk_i,
input reset_i,
input [31:0] addr_i,
input [31:0] wdata_i,
output [31:0] rdata_o,
input [1:0] size_i,
input rd_i,
input wr_i,
input uart_rx_i,
output uart_tx_o
);
// Note: this is not the most efficient UART implementation, but it is simple
// and works for the purpose of demonstration
//-----------------------------------------------------------------------------
localparam
REG_CSR = 8'h00,
REG_BR = 8'h04,
REG_DATA = 8'h08;
localparam
BIT_CSR_TX_READY = 0,
BIT_CSR_RX_READY = 1;
//-----------------------------------------------------------------------------
wire reg_csr_w;
wire reg_br_w;
wire reg_data_w;
assign reg_csr_w = (REG_CSR == addr_i[7:0]);
assign reg_br_w = (REG_BR == addr_i[7:0]);
assign reg_data_w = (REG_DATA == addr_i[7:0]);
//-----------------------------------------------------------------------------
reg [15:0] br_r;
always @(posedge clk_i)
begin
if(reset_i)
br_r <= 16'h0;
else if(wr_i && reg_br_w)
br_r <= wdata_i[15:0];
end
//-----------------------------------------------------------------------------
`ifdef SIMULATOR
always @(posedge clk_i)
begin
if(wr_i && reg_data_w)
$display("%c", wdata_i[7:0]);
end
assign uart_tx_o = 1'b1;
`else
//-----------------------------------------------------------------------------
reg [15:0] tx_br_cnt_r;
reg [3:0] tx_bit_cnt_r;
reg [9:0] tx_shifter_r;
reg tx_ready_r;
always @(posedge clk_i)
begin
if(reset_i)
begin
tx_br_cnt_r <= 16'd0;
tx_bit_cnt_r <= 4'd0;
tx_shifter_r <= 10'h1;
tx_ready_r <= 1'b1;
end
else if(tx_bit_cnt_r)
begin
if(tx_br_cnt_r == br_r)
begin
tx_shifter_r <= { 1'b1, tx_shifter_r[9:1] };
tx_bit_cnt_r <= tx_bit_cnt_r - 4'd1;
tx_br_cnt_r <= 16'd0;
end
else
begin
tx_br_cnt_r <= tx_br_cnt_r + 16'd1;
end
end
else if(!tx_ready_r)
begin
tx_ready_r <= 1'b1;
end
else if(wr_i && reg_data_w)
begin
tx_shifter_r <= { 1'b1, wdata_i[7:0], 1'b0 };
tx_bit_cnt_r <= 4'd10;
tx_ready_r <= 1'b0;
end
end
assign uart_tx_o = tx_shifter_r[0];
`endif
//-----------------------------------------------------------------------------
reg [15:0] rx_br_cnt_r;
reg [3:0] rx_bit_cnt_r;
reg [8:0] rx_shifter_r;
reg [7:0] rx_data_r;
reg rx_done_r;
always @(posedge clk_i)
begin
if(reset_i)
begin
rx_br_cnt_r <= 16'd1;
rx_bit_cnt_r <= 4'd0;
rx_shifter_r <= 10'h0;
rx_done_r <= 1'b0;
end
else if(rx_bit_cnt_r)
begin
if(rx_br_cnt_r == br_r)
begin
rx_shifter_r <= { uart_rx_i, rx_shifter_r[8:1] };
rx_bit_cnt_r <= rx_bit_cnt_r - 4'd1;
rx_br_cnt_r <= 16'd0;
end
else
begin
rx_br_cnt_r <= rx_br_cnt_r + 16'd1;
end
end
else if(rx_done_r)
begin
rx_done_r <= 1'b0;
end
else if(rx_shifter_r[8])
begin
rx_data_r <= rx_shifter_r[7:0];
rx_done_r <= 1'b1;
rx_shifter_r <= 10'h0;
end
else if(!uart_rx_i)
begin
if(rx_br_cnt_r)
rx_br_cnt_r <= rx_br_cnt_r - 16'd1;
else
rx_bit_cnt_r <= 4'd9;
end
else
begin
rx_br_cnt_r <= { 1'b0, br_r[15:1] };
end
end
//-----------------------------------------------------------------------------
reg rx_ready_r;
always @(posedge clk_i)
begin
if(reset_i)
rx_ready_r <= 1'b0;
else if(rx_done_r)
rx_ready_r <= 1'b1;
else if(rd_i && reg_data_w)
rx_ready_r <= 1'b0;
end
//-----------------------------------------------------------------------------
wire [31:0] csr_w;
`ifdef SIMULATOR
assign csr_w[BIT_CSR_TX_READY] = 1'b1;
assign csr_w[BIT_CSR_RX_READY] = 1'b0;
`else
assign csr_w[BIT_CSR_TX_READY] = tx_ready_r;
assign csr_w[BIT_CSR_RX_READY] = rx_ready_r;
`endif
assign csr_w[31:2] = 30'h0;
//-----------------------------------------------------------------------------
reg [31:0] reg_data_r;
always @(posedge clk_i)
begin
if(reset_i)
reg_data_r <= csr_w;
else if(reg_data_w)
reg_data_r <= { 24'h0, rx_data_r };
else
reg_data_r <= csr_w;
end
//-----------------------------------------------------------------------------
assign rdata_o = reg_data_r;
endmodule
per_timer.v
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: per_timer
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module per_timer
(
input clk_i,
input reset_i,
input [31:0] addr_i,
input [31:0] wdata_i,
output [31:0] rdata_o,
input [1:0] size_i,
input rd_i,
input wr_i
);
//-----------------------------------------------------------------------------
localparam
REG_CSR = 8'h00,
REG_COUNT = 8'h04,
REG_COMPARE = 8'h08;
localparam
BIT_CSR_ENABLE = 0,
BIT_CSR_DISABLE = 1,
BIT_CSR_OVERFLOW = 2;
//-----------------------------------------------------------------------------
wire reg_csr_w;
wire reg_count_w;
wire reg_compare_w;
wire timer_overflow_w;
assign reg_csr_w = (REG_CSR == addr_i[7:0]);
assign reg_count_w = (REG_COUNT == addr_i[7:0]);
assign reg_compare_w = (REG_COMPARE == addr_i[7:0]);
//-----------------------------------------------------------------------------
reg timer_enabled_r;
always @(posedge clk_i)
begin
if(reset_i)
timer_enabled_r <= 1'b0;
else if(wr_i && reg_csr_w && wdata_i[BIT_CSR_ENABLE])
timer_enabled_r <= 1'b1;
else if(wr_i && reg_csr_w && wdata_i[BIT_CSR_DISABLE])
timer_enabled_r <= 1'b0;
end
//-----------------------------------------------------------------------------
reg [31:0] timer_count_r;
always @(posedge clk_i)
begin
if(reset_i)
timer_count_r <= 32'h0;
else if(wr_i && reg_count_w)
timer_count_r <= wdata_i;
else if(timer_overflow_w)
timer_count_r <= 32'h0;
else if(timer_enabled_r)
timer_count_r <= timer_count_r + 32'd1;
end
//-----------------------------------------------------------------------------
reg [31:0] timer_compare_r;
always @(posedge clk_i)
begin
if(reset_i)
timer_compare_r <= 32'h0;
else if(wr_i && reg_compare_w)
timer_compare_r <= wdata_i;
end
//-----------------------------------------------------------------------------
reg timer_overflow_r;
always @(posedge clk_i)
begin
if(reset_i)
timer_overflow_r <= 1'b0;
else if(wr_i && reg_csr_w && wdata_i[BIT_CSR_OVERFLOW])
timer_overflow_r <= 1'b0;
else if(timer_overflow_w)
timer_overflow_r <= 1'b1;
end
assign timer_overflow_w = timer_enabled_r && (timer_count_r == timer_compare_r);
//-----------------------------------------------------------------------------
wire [31:0] csr_w;
assign csr_w[BIT_CSR_ENABLE] = timer_enabled_r;
assign csr_w[BIT_CSR_DISABLE] = !timer_enabled_r;
assign csr_w[BIT_CSR_OVERFLOW] = timer_overflow_r;
assign csr_w[31:3] = 29'h0;
//-----------------------------------------------------------------------------
reg [31:0] reg_data_r;
always @(posedge clk_i)
begin
reg_data_r <= csr_w;
end
//-----------------------------------------------------------------------------
assign rdata_o = reg_data_r;
endmodule
SoC验证相比于ASIC或单个IP的验证要更复杂,不仅需要搭建testbench产生外部激励,也要生成指令(存入Memory)对CPU的运转进行测试。
tb_riscv_soc.sv :在testbench中直接将要执行的指令load进入Memory中。
//`timescale 1ns / 1ps
// Company:
// Engineer:
//
// Create Date: 2021/03/13
// Author Name: Sniper
// Module Name: tb_riscv_soc
// Project Name:
// Target Devices:
// Tool Versions:
// Description:
//
// Dependencies:
//
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
//
module tb_riscv_soc;
//parameter
parameter MEM_SIZE = 8192;
parameter FIRMWARE = "";
//DUT signals
reg clk;
reg reset;
wire lock;
reg uart_rx;
wire uart_tx;
reg [31:0] gpio_in;
wire [31:0] gpio_out;
initial
begin
clk = 1;
reset = 1;
uart_rx = 1'b1;
gpio_in = 32'hABCD5678;
#100;
@(posedge clk);
reset <= 0;
end
//clock
always #5 clk = ~clk;
localparam INSTR_CNT = 30'd24;
wire [0:INSTR_CNT-1] [31:0] instr_rom_cell = {
32'h000062b3, // 0x00000000
32'h200002b7, // 0x00000004
32'h12345337, // 0x00000008
32'h67836313, // 0x0000000c
32'h0062a023, // 0x00000010
32'h00006e33, // 0x00000014
32'h008e6e13, // 0x00000018
32'h00006333, // 0x0000001c
32'h0fe36313, // 0x00000020
32'h01c31333, // 0x00000024
32'h0dc36313, // 0x00000028
32'h0062a023, // 0x0000002c
32'h0002a383, // 0x00000030
32'h000062b3, // 0x00000034
32'h00006333, // 0x00000038
32'h000063b3, // 0x0000003c
32'h00006e33, // 0x00000040
32'h000062b3, // 0x00000044
32'h0082e293, // 0x00000048
32'h12345337, // 0x0000004c
32'h67836313, // 0x00000050
32'h0062a023, // 0x00000054
32'h0002a383, // 0x00000058
32'hfa5ff06f // 0x0000005c
};
//Instructions
integer i;
initial
begin
for(i=0;i<INSTR_CNT;i++)
begin
u_riscv_soc.u_memory.mem_r[i] = instr_rom_cell[i];
$display("mem[%0d] = %32x", i, u_riscv_soc.u_memory.mem_r[i]);
end
end
//DUT
riscv_soc
#(
.MEM_SIZE(MEM_SIZE),
.FIRMWARE(FIRMWARE)
)
u_riscv_soc
(
.clk_i(clk),
.reset_i(reset),
.lock_o(lock),
.uart_rx_i(uart_rx),
.uart_tx_o(uart_tx),
.gpio_in_i(gpio_in),
.gpio_out_o(gpio_out)
);
initial
begin
$dumpfile("tb_riscv_soc.vcd");
$dumpvars(0,tb_riscv_soc);
end
initial #5000 $finish;
endmodule
Makefile
# Makefile
# -------------------------
# target file : source file
# [TAB]command
RTL += ../rtl/soc/riscv_soc.v
RTL += ../rtl/cpu/riscv_cpu.v
RTL += ../rtl/bus/bus_mux.v
RTL += ../rtl/perips/per_gpio.v
RTL += ../rtl/perips/per_uart.v
RTL += ../rtl/perips/per_timer.v
RTL += ../rtl/perips/memory.v
TB := ../bench/tb_riscv_soc.sv
CURVE := ./tb_riscv_soc.vcd
# ---------- run common simulation ----------
run: compile simulate
compile:
vcs -sverilog -debug_all -timescale=1ns/1ns $(RTL) $(TB) -l com.log
simulate:
./simv -l run.log
run_dve:
dve -vpd $(CURVE) &
clean:
rm -rf csrc DVEfiles simv.daidir coverage *.vdb *.key *.vcd *.vpd *.log simv
汇编指令如下所示,主要测试了GPIO读写、Memory读写。
.org 0x0
.global _start
_start:
main:
# GPIO外设测试,令t0寄存器=0x20000000,即gpio外设的地址
or t0, zero,zero # t0 清零
lui t0, 0x20000 # t0 寄存器的高20bit=0x20000
lui t1, 0x12345 # t1 寄存器的高20bit=0x12345
ori t1, t1, 0x678 # t1 寄存器的低12bit=0x678
sw t1, (t0) # t1 写入t0地址,即GPIO输出0x12345678
or t3, zero,zero # t3 清零
ori t3, t3, 0x008 # t3 寄存器的低12bit=0x008
or t1, zero,zero # t1 清零
ori t1, t1, 0x0FE # t1 寄存器的低12bit=0x0FE
sll t1, t1, t3 # t1 = t1 << t3 (= t1 << 8)
ori t1, t1, 0x0DC # t1 = t1 | 0x00000DC
sw t1, (t0) # t1写入t0地址,即GPIO输出0x0000FEDC
lw t2, (t0) # 将GPIO输入值读取到t2寄存器
or t0, zero,zero # t0 清零
or t1, zero,zero # t1 清零
or t2, zero,zero # t2 清零
or t3, zero,zero # t3 清零
# Memory读写测试,令t0寄存器=0x00000008,是MEM所在的地址区间
or t0, zero,zero # t0 清零
ori t0, t0, 0x008 # t0 寄存器的低12bit=0x008
lui t1, 0x12345 # t1 寄存器的高20bit=0x12345
ori t1, t1, 0x678 # t1 寄存器的低12bit=0x678
sw t1, (t0) # t1 写入t0地址,即MEM写入0x12345678
lw t2, (t0) # 将MEM中(t0)地址对应的数据字读取到t2寄存器
return:
jal zero, main # 程序结束,跳到main,重新运行
汇编程序生成机器指令,使用到了一个软件来转换指令的格式,相关的RISC-V汇编软件网上应该也有很多。将保存下来的指令流(Verilog)添加到tb_riscv_soc.sv的56行位置即可。
在sim文件夹下包含一个Makefile,可以在sim路径下使用以下命令进行RTL仿真。
make run
make run_dve
make clean
结果如下所示:
[IC@IC sim]$ make run
vcs -sverilog -debug_all -timescale=1ns/1ns ../rtl/soc/riscv_soc.v ../rtl/cpu/riscv_cpu.v ../rtl/bus/bus_mux.v ../rtl/perips/per_gpio.v ../rtl/perips/per_uart.v ../rtl/perips/per_timer.v ../rtl/perips/memory.v ../bench/tb_riscv_soc.sv -l com.log
...
130 Read insturction: mem[0] = 0x000062b3
210 Read insturction: mem[1] = 0x200002b7
290 Read insturction: mem[2] = 0x12345337
370 Read insturction: mem[3] = 0x67836313
450 Read insturction: mem[4] = 0x0062a023
490 GPIO_OUT_DATA = 0x12345678
530 Read insturction: mem[5] = 0x00006e33
610 Read insturction: mem[6] = 0x008e6e13
690 Read insturction: mem[7] = 0x00006333
770 Read insturction: mem[8] = 0x0fe36313
850 Read insturction: mem[9] = 0x01c31333
930 Read insturction: mem[10] = 0x0dc36313
1010 Read insturction: mem[11] = 0x0062a023
1050 GPIO_OUT_DATA = 0x0000fedc
1090 Read insturction: mem[12] = 0x0002a383
1130 GPIO_READ_DATA = 0xabcd5678
1170 Read insturction: mem[13] = 0x000062b3
1250 Read insturction: mem[14] = 0x00006333
1330 Read insturction: mem[15] = 0x000063b3
1410 Read insturction: mem[16] = 0x00006e33
1490 Read insturction: mem[17] = 0x000062b3
1570 Read insturction: mem[18] = 0x0082e293
1650 Read insturction: mem[19] = 0x12345337
1730 Read insturction: mem[20] = 0x67836313
1810 Read insturction: mem[21] = 0x0062a023
1850 Write memory: mem[2][7:0] = 0x78
1850 Write memory: mem[2][15:8] = 0x56
1850 Write memory: mem[2][23:16] = 0x34
1850 Write memory: mem[2][31:24] = 0x12
1890 Read insturction: mem[22] = 0x0002a383
1930 Read memory: mem[2] = 0x12345678 (word)
1970 Read insturction: mem[23] = 0xfa5ff06f
2050 Read insturction: mem[0] = 0x000062b3
2130 Read insturction: mem[1] = 0x200002b7
2210 Read insturction: mem[2] = 0x12345678
2220 Illegal Instruction. CPU is locked.
$finish called from file "../bench/tb_riscv_soc.sv", line 121.
$finish at simulation time 5000
V C S S i m u l a t i o n R e p o r t
Time: 5000 ns
...
[IC@IC sim]$
可以看到,当程序从main: 运行第二次时,由于第一次在mem[2]中改变了数据,CPU识别不出这条新的“指令”,因此进入了LOCK状态,这就是冯·诺依曼体系结构可能存在的bug(或者也可以说是一种动态修改指令的功能)。如果要避免这种情况,可以添加数据地址段的访问权限。
目前设计的这个SoC还比较简单,整个SoC还有进一步改进的空间:(1)CPU可以实现pipeline结构(这就要引入旁路、分支预测等技术来解决流水线的数据冲突、提高pipeline处理速度);(2)CPU可以实现更多的RISC-V指令;(3)CPU可以添加JTAG接口用来观测内部运行情况、载入机器指令程序;(4)CPU增加中断响应操作;(5)总线可采用AXI等标准总线实现高性能传输;(6)每个IP在读写数据接口可以增加异步FIFO作为缓存。