基于verilog的单周期处理器设计

单周期处理器是指取指译码等操作在一个时钟周期内完成。本单周期处理器支持R型指令add,addu,sub,subu,slt,sltu;I型指令ori,lw,sw,beq,bne,移位指令sll,srl,sra指令;J型指令的J指令和Jr指令。单周期处理器设计包括两大部分设计,一个是控制部件的设计,另一个是数据通路的设计。
设计整体框图如下:
基于verilog的单周期处理器设计_第1张图片

顶层模块如下

module SingleCycleCpu(clk,rst_n);

input clk;
input rst_n;

wire [31:0] instruction;
wire RegDst;
wire beq;
wire bne;
wire Jump;
wire ExtOp;
wire ALUSrc;
wire [3:0] ALUctr;
wire MemWr;
wire RegWr;
wire MemtoReg;
wire jr;


Controler Controler(
		.instruction(instruction),
		.RegDst(RegDst),
		.bne(bne),
		.beq(beq),
		.Jump(Jump),
		.ExtOp(ExtOp),
		.ALUSrc(ALUSrc),
		.ALUctr(ALUctr),
		.MemWr(MemWr),
		.RegWr(RegWr),
		.MemtoReg(MemtoReg),
		.jr(jr)
);


SingleDataPath  SingleDataPath(
		.clk(clk),
		.rst_n(rst_n),
		.RegDst(RegDst),
		.RegWr(RegWr),
		.Jump(Jump),
		.ExtOp(ExtOp),
		.ALUSrc(ALUSrc),
		.ALUctr(ALUctr),
		.MemWr(MemWr),
		.MemtoReg(MemtoReg),
		.instruction(instruction),
		.bne(bne),
		.beq(beq),
		.jr(jr)
);


endmodule 

指令存储器设计如下

module RomNoDelay(
   input clk,
   input [7:0] RdAddr,
   output [31:0] RdData
);
  reg [31:0] rom [255:0];
  
  initial begin
     $readmemh("F:/work/digital_system_practise/data_path/rtl/inst_rom.data",rom);
  end 
  
  assign RdData  = rom[RdAddr];

endmodule

数据存储器设计如下

module RamNoDelay(
  input clk,
  input wen,
  input [7:0] WrAddr,
  input [31:0] WrData,
  input [7:0] RdAddr,
  output [31:0] RdData
);
 
 reg [31:0] ram [255:0];
 always@(posedge clk)begin
   if(wen)
     ram[WrAddr] <= WrData;
 end 
 
 assign RdData = ram[RdAddr];

endmodule

寄存器堆设计如下

module dflipflop(
             input          clk,
				 input  [4:0]  	Ra,
				 input  [4:0]   Rb,
				 input  [4:0]   Rw,
				 input          Wen,
				 output [31:0]  BusA,
				 output [31:0]  BusB,
				 input  [31:0]  BusW
);
				 
				 reg    [31:0]DataReg[31:0];			 
	

	            always@(posedge clk)
		        begin	
			         if(Wen & Rw!=5'd0)
			        	DataReg[Rw] <= BusW;
		        end		
	
	            assign BusA = (Ra==5'd0)?32'd0:DataReg[Ra];
	            assign BusB = (Rb==5'd0)?32'd0:DataReg[Rb];
		
endmodule

alu的框图如下:
基于verilog的单周期处理器设计_第2张图片
alu模块代码如下:

module alu(
  input [31:0] alu_DA,
  input [31:0] alu_DB,
  input [3:0] alu_CTL,
  input [4:0] alu_SHIFT,
  output alu_ZERO,
  output alu_Overflow,
  output reg [31:0] alu_DC 
);
    
  /******************general ctr ****************************/
  wire SUBctr;
  wire SIGctr;
  wire Ovctr;
  wire [1:0] Opctr;
  wire [1:0] Logicctr;
  wire [1:0] Shiftctr;
  
  assign SUBctr = ((~alu_CTL[3]) &  ~alu_CTL[2] & alu_CTL[1])|(alu_CTL[3] & ~alu_CTL[2]);
  assign Opctr = alu_CTL[3:2];
  assign Ovctr = (alu_CTL[0] & ~alu_CTL[3] & ~alu_CTL[2])|(alu_CTL[3] & ~alu_CTL[2] & ~alu_CTL[1] & alu_CTL[0]);
  assign SIGctr = alu_CTL[0];
  assign Logicctr = alu_CTL[1:0];
  assign Shiftctr = alu_CTL[1:0];

  /*****************logic op****************************/
  reg [31:0] logic_result;
  
  always@(*)begin
    case(Logicctr)
	   2'b00:logic_result = alu_DA & alu_DB;
	   2'b01:logic_result = alu_DA | alu_DB;
	   2'b10:logic_result = alu_DA ^ alu_DB;
	   2'b11:logic_result = ~(alu_DA | alu_DB);
	endcase
  end
  
  /********************shift op****************************/
  wire [31:0] shift_result;
  
  Shifter Shifter(
      .alu_DB(alu_DB),
      .ALUSHIFT(alu_SHIFT),
      .Shiftctr(Shiftctr),
	  .shift_result(shift_result)
  ); 
  
  /************************add sub op*********************************/
  wire [31:0] BIT_M,XOR_M;
  wire ADD_carry,ADD_OverFlow;
  wire [31:0] ADD_result;
  wire [31:0] comp_M;
  assign BIT_M = {32{SUBctr}};
  assign XOR_M = (alu_DB[31] && SUBctr)?{1'b0,alu_DB[30:0]}:BIT_M ^ alu_DB;
  assign comp_M = (alu_DB[31] && SUBctr)?XOR_M: XOR_M + SUBctr;
  assign alu_Overflow = ADD_OverFlow & Ovctr;
  assign {ADD_carry,ADD_result} = alu_DA + comp_M;
  assign  ADD_OverFlow = (alu_DA[31] && comp_M[31] && !ADD_result[31]) +
                          (!alu_DA[31] && !comp_M[31] && ADD_result[31]); 
  assign alu_ZERO = (|ADD_result)?1'b0:1'b1;
  /****************************slt op****************************/
  wire [31:0] SLT_result;
  wire LESS_M1,LESS_M2,LESS_S,SLT_M;
  
  assign LESS_M1 = ADD_carry ^ SUBctr;
  assign LESS_M2 = ADD_OverFlow ^ ADD_result[31];
  assign LESS_S = (SIGctr == 1'b0)?LESS_M1:LESS_M2;
  assign SLT_result = (LESS_S)?32'hffff_ffff:32'h0000_0000;
  
  /****************************ALU_result***************************/
  always@(*)
  begin
    case(Opctr)
	  2'b00:alu_DC = ADD_result;
	  2'b01:alu_DC = logic_result;
	  2'b10:alu_DC = SLT_result;
	  2'b11:alu_DC = shift_result;	  
	endcase
  end
 
endmodule 

移位器设计如下:

module Shifter(
  input [31:0] alu_DB,
  input [4:0] ALUSHIFT,
  input [1:0] Shiftctr,
  output reg  [31:0] shift_result
);
  
  wire [5:0] shift_n;
  assign shift_n = 6'd32 - ALUSHIFT;
  always@(*)begin
     case(Shiftctr)
	     2'b00:  shift_result = alu_DB << ALUSHIFT;
		 2'b01:  shift_result = alu_DB >> ALUSHIFT;
		 2'b10:  shift_result = ({32{alu_DB[31]}} << shift_n) | (alu_DB >> ALUSHIFT);
		default: shift_result = alu_DB;
	 endcase
  end 

endmodule 

取指令部件的框图
基于verilog的单周期处理器设计_第3张图片
取指令部件模块的代码

module if_fetch(
   input rst_n,
   input clk,
   input jump,
   input beq,
   input bne,
   input zero,
   input jr,
   input [31:0] BusA,
   output [31:0] ins   
);

reg  [29:0] PC;
wire [29:0] PC_next;
wire [29:0] PC_sel;
wire [29:0] PC_Branch;
wire [29:0] PC_INC;
wire [15:0] imme;

RomNoDelay RomNoDelay(
   .clk(clk),
   .RdAddr(PC[7:0]),
   .RdData(ins)
);

always@(posedge clk or negedge rst_n)
   if(!rst_n)
     PC <= 30'd0;
   else 
     PC <= PC_next;
	 

assign imme = ins[15:0];
assign PC_INC  =  PC + 1'b1;   
assign PC_Branch  =  PC_INC + {{14{imme[15]}},imme};    
assign PC_sel  =  ((beq && zero) || (bne && !zero))?PC_Branch:PC_INC;    
assign PC_next = (jump)?{PC[29:26],ins[25:0]}
                 :(jr)? BusA[31:2]
				     :PC_sel;

endmodule 



数据通路模块的代码如下

module SingleDataPath(
		clk,
		rst_n,
		RegDst,
		RegWr,
		Jump,
		ExtOp,
		ALUSrc,
		ALUctr,
		MemWr,
		MemtoReg,
		instruction,
		bne,
		beq,
		jr
);

input clk;
input rst_n;
input RegDst;
input RegWr;
input Jump;
input ExtOp;
input ALUSrc;
input [3:0]ALUctr;
input MemWr;
input MemtoReg;
input bne;
input beq;
input jr;
output [31:0]instruction;


wire zero;
wire [31:0]instruction;
wire [4:0] Rw;
wire Rwen;
wire [31:0] BusA;
wire [31:0] BusB;
wire [31:0] BusBm;
wire [31:0] BusW;
wire [31:0] ALU_DC;
wire [31:0]RdData;
wire overflow;
wire [31:0] Extimme;


wire [4:0] Rs;
wire [4:0] Rt;
wire [4:0] Rd;
wire [4:0] shift;
wire [15:0] imme;

assign Rs = instruction[25:21];
assign Rt = instruction[20:16];
assign Rd = instruction[15:11];
assign shift = instruction[10:6];
assign imme = instruction[15:0];
assign Rw = (RegDst)?Rd:Rt;
assign Rwen = (~overflow) & RegWr;
assign BusW = (MemtoReg)?RdData:ALU_DC;
assign Extimme = (ExtOp)?{{16{imme[15]}},imme}:{16'b0,imme};
assign BusBm = (ALUSrc)?Extimme:BusB;

alu alu(
	  .alu_DA(BusA),
	  .alu_DB(BusBm),
	  .alu_CTL(ALUctr),
	  .alu_SHIFT(shift),
	  .alu_ZERO(zero),
	  .alu_Overflow(overflow),
	  .alu_DC(ALU_DC)
);

dflipflop regfile( 
         .clk(clk),
		 .Ra(Rs),
		 .Rb(Rt),
		 .Rw(Rw),
		 .Wen(Rwen),
		 .BusA(BusA),
		 .BusB(BusB),
		 .BusW(BusW)
);


if_fetch if_fetch(
	   .rst_n(rst_n),
	   .clk(clk),
	   .jump(Jump),
	   .beq(beq),
	   .bne(bne),
	   .zero(zero),
	   .jr(jr),
	   .BusA(BusA),
	   .ins(instruction)  
);

RamNoDelay RamNoDelay(
     .clk(clk),
	  .wen(MemWr),
	  .WrAddr(ALU_DC),
	  .WrData(BusB),
	  .RdAddr(ALU_DC),
	  .RdData(RdData)
);

endmodule 

控制部件的模块代码如下

module Controler(
    instruction,
    RegDst,
    bne,
	 beq,
    Jump,
    ExtOp,
    ALUSrc,
    ALUctr,
    MemWr,
	 RegWr,
    MemtoReg,
	 jr
);

input [31:0] instruction;
output RegDst;
output beq;
output bne;
output Jump;
output ExtOp;
output ALUSrc;
output [3:0] ALUctr;
output MemWr;
output RegWr;
output MemtoReg;
output jr;

wire [5:0] op;
wire [5:0] func;
wire R_type;
wire [3:0] aluOp1;
wire [3:0] aluOp2;


assign op = instruction[31:26];
assign func = instruction[5:0];
assign jr = ((op == 6'b000000) && (func == 6'b001000));

maincontrol maincontrol(
		.op(op),
		.RegDst(RegDst),
		.beq(beq),
		.bne(bne),
		.Jump(Jump),
		.ExtOp(ExtOp),
		.ALUSrc(ALUSrc),
		.aluOp(aluOp1),
		.MemWr(MemWr),
		.RegWr(RegWr),
		.MemtoReg(MemtoReg),
		.R_type(R_type)
);

aluControl aluControl(
	   .func(func),
	   .aluOp(aluOp2)
);

assign ALUctr = (R_type)?aluOp2:aluOp1;

				
endmodule 
module maincontrol(
    op,
	RegDst,
	beq,
	bne,
	Jump,
	ExtOp,
	ALUSrc,
	aluOp,
	MemWr,
	MemtoReg,
	RegWr,
	R_type
);

input [5:0] op;
output RegDst;
output RegWr;
output Jump;
output ExtOp;
output ALUSrc;
output [3:0] aluOp;
output MemWr;
output MemtoReg;
output R_type;
output beq;
output bne;

wire ls;
wire lw = (op == 6'b100011);
wire sw = (op == 6'b101011);
wire beq1 = (op == 6'b000100);
wire bne1 = (op == 6'b000101);
wire j = (op == 6'b000010);
wire ori = (op == 6'b001101);

assign R_type = (op == 6'b000000);
assign RegDst = R_type;
assign RegWr = R_type | lw | ori;
assign beq = beq1; 
assign bne = bne1;
assign Jump = j;
assign ExtOp = lw | sw;
assign ALUSrc = lw | sw | ori;
assign MemWr  = sw;
assign MemtoReg = lw;
assign ls = lw | sw;

assign aluOp =  ({4{ls}}&4'b0001) | ({4{beq}}& 4'b0011) | ({4{bne}}& 4'b0011)|({4{ori}}& 4'b0101);
				
endmodule 
module aluControl(
   input [5:0] func,
   output [3:0] aluOp
);

wire add = (func == 6'b100000);
wire addu = (func == 6'b100001);
wire sub = (func == 6'b100010);
wire subu = (func == 6'b100011);
wire and1 = (func == 6'b100100);
wire or1 = (func == 6'b100101);
wire xor1 = (func == 6'b100110);
wire nor1 = (func == 6'b100111);
wire sltu = (func == 6'b101011);
wire slt = (func == 6'b101010);
wire sll = (func == 6'b000000);
wire srl = (func == 6'b000010);
wire sra = (func == 6'b000011);


assign aluOp = ({4{add}} & 4'b0001) |
               ({4{addu}} & 4'b0000) |
			   ({4{sub}} & 4'b0011) |
			   ({4{subu}} & 4'b0010) |
			   ({4{and1}} & 4'b0100) |
			   ({4{or1}} & 4'b0101) |
			   ({4{xor1}} & 4'b0110) |
			   ({4{nor1}} & 4'b0111) |
			   ({4{sltu}} & 4'b1000) |
			   ({4{slt}} & 4'b1001) |
			   ({4{sll}} & 4'b1100) |
			   ({4{srl}} & 4'b1101) |
			   ({4{sra}} & 4'b1110);
			   

				
endmodule 

1、 总体仿真验证
1.1 验证方案
然后把之前编写的makefile,BinMem.exe,ram.ld放到源代码所在的目录,输入命令make all,得到与汇编代码相匹配的16进制格式的指令,仿真时将汇编代码的结果与仿真结果比较。
编写的inst_rom.S如下:

.org 0x0
   .set noat
   .set noreorder
   .set nomacro
   .global _start
_start:


   ori   $1,$0,0x8000           # $1 = 0x00008000
   sll   $1,$1,16               # $1 = 0x80000000
   ori   $1,$1,0x0010           # $1 = 0x80000010

   ori   $2,$0,0x8000           # $2 = 0x00008000
   sll   $2,$2,16               # $2 = 0x80000000
   ori   $2,$2,0x0001           # $2 = 0x80000001

   ori   $3,$0,0x0000           # $3 = 0x00000000
   addu  $3,$2,$1               # $3 = 0x00000011
   ori   $3,$0,0x0000           # $3 = 0x00000000
   

   sub   $3,$1,$3              # $3 = 0x80000010         
   subu  $3,$3,$2              
   

   #########     slt\sltu    ##########

   ori   $1,$0,0xffff           # $1 = 0xffff
   sll  $1,$1,16               # $1 = 0xffff0000
   slt  $2,$1,$0               # $2 = 1
   sltu $2,$2,$0               # $2 = 0
  
   #########     lw\sw    ##########
   ori  $3,$0,0x4455
   sll  $3,$3,0x10
   ori  $3,$3,0x6677     
   sw   $3,0x8($0)       # [0x8] = 0x44, [0x9]= 0x55, [0xa]= 0x66, [0xb] = 0x77
   lw   $1,0x8($0)       # $1 = 0x44556677
   
   #########     sll\srl    ##########
   sll  $1,$1,4          # $1 = 0x45566770
   srl  $1,$1,4          # $1 = 0x04556677
   ori  $1,$0,0x8000    # $1 = 0x00008000
   sll   $1,$1,16        # $1 = 0x80000000
   sra   $1,$1,4         # $1 = 0xf8000000
   
   bne   $1,$0,N1   
   nop
   nop
   
   N1:   beq $1,$1,s3
   
   s3:   ori   $1,$0,0x80           #  $1 = 0x00000080
         jr    $1                     #  $1 =0x00000080   跳转到           0x00000080处
   
   .org 0x80
   j  0x90
   
   .org 0x90
   ori   $2,$0,56                 # $2 = 56 
   ori   $1,$0,4                 # $1 = 4
   ori   $3,$0,0                 # $3 = 0
   add   $3,$1,$2                # $3 = 60

生成的初始化ROM中的16进制指令如下
34018000
00010c00
34210010
34028000
00021400
34420001
34030000
00411821
34030000
00231822
00621823
3401ffff
00010c00
0020102a
0040102b
34034455
00031c00
34636677
ac030008
8c010008
00010900
00010902
34018000
00010c00
00010903
14200002
00000000
00000000
10210000
34010080
00200008
00000000
08000024
00000000
00000000
00000000
34020038
34010004
34030000
00221820

1.2 仿真结果(所有指令格式与ROM中一致,为16进制)
34210010 ori $1,$1,0x0010 # $1 = 0x80000010
观察BusW为0x80000010
基于verilog的单周期处理器设计_第4张图片
00411821 addu $3,$2,$1 # $3 = 0x00000011
观察BusW为0x00000011
基于verilog的单周期处理器设计_第5张图片
00231822 sub $3,$1,$3 # $3 = 0x80000010
观察BusW为0x80000010
基于verilog的单周期处理器设计_第6张图片
0020102a slt $2,$1,$0 # $2 = 0xffffffff
观察BusW为0xffffffff
基于verilog的单周期处理器设计_第7张图片
ac030008为sw指令的16进制编码,8c010008为lw指令的16进制编码
sw $3,0x8($0) # [0x8] = 0x44, [0x9]= 0x55, [0xa]= 0x66, [0xb] = 0x77
lw $1,0x8($0) # $1 = 0x44556677
观察BusW的为0x44556677
基于verilog的单周期处理器设计_第8张图片
00010900 sll $1,$1,4 # $1 = 0x45566770
观察BusW的为0x45566770
基于verilog的单周期处理器设计_第9张图片
00010902 srl $1,$1,4 # $1 = 0x04556677
观察BusW的为0x04556677
基于verilog的单周期处理器设计_第10张图片
00010903 sra $1,$1,4 # $1 = 0xf8000000
观察BusW的为0xf8000000
基于verilog的单周期处理器设计_第11张图片
bne $1,$0,N1
nop
nop

N1: beq $1,$1,s3

s3: ori $1,$0,0x80 # $1 = 0x00000080
如果执行到最后一步,说明beq和bne指令执行正确

观察BusW的值为0x00000080,且取出的指令为34010080,就是ori指令的16进制编码
基于verilog的单周期处理器设计_第12张图片

jr $1 # $1 =0x00000080 跳转到 0x00000080处
.org 0x80
j 0x90

.org 0x90
ori $2,$0,56 # $2 = 56
ori $1,$0,4 # $1 = 4
ori $3,$0,0 # $3 = 0
add $3,$1,$2 # $3 = 60
如果最后四条指令执行了,j和jr指令仿真正确

Add指令16进制编码为00221820,为最后一条指令,最后结果得60
观察BusW的值依次为56,4,0,60
基于verilog的单周期处理器设计_第13张图片
到此完整的单周期处理器设计加仿真就完成了。
附上控制器的仿真代码

`timescale 1ns/1ns

module Controler_tb;

reg[31:0] instruction;
wire RegDst;
wire beq;
wire bne;
wire Jump;
wire ExtOp;
wire ALUSrc;
wire [3:0] ALUctr;
wire MemWr;
wire MemtoReg;
wire jr;
wire RegWr;


Controler Controler(
		 .instruction(instruction),
		 .RegDst(RegDst),
		 .bne(bne),
		 .beq(beq),
		 .Jump(Jump),
		 .ExtOp(ExtOp),
		 .ALUSrc(ALUSrc),
		 .ALUctr(ALUctr),
		 .MemWr(MemWr),
		 .MemtoReg(MemtoReg),
		 .RegWr(RegWr),
		 .jr(jr)
);

initial begin
    /**********add*******/
    instruction = 32'b000000_00000_00000_00000_00000_100000;
	 #5;
	 
	 /**********addu*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100001;
	 #5;
	 
	 /**********sub*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100010;
	 #5;
	 
	 /**********subu*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100011;
	 #5;
	 
	 /**********slt*******/
	 instruction = 32'b000000_00000_00000_00000_00000_101010;
	 #5;
	 
	 /**********sltu*******/
	 instruction = 32'b000000_00000_00000_00000_00000_101011;
	 #5;
	 
	 /**********and*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100100;
	 #5;
	 
	 /**********or*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100101;
	 #5;
	 
	 /**********xor*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100110;
	 #5;
	 
	 /**********nor*******/
	 instruction = 32'b000000_00000_00000_00000_00000_100111;
	 #5;
	 
	 /**********sll*******/
	 instruction = 32'b000000_00000_00000_00000_00000_000000;
	 #5;
	 
	 /**********srl*******/
	 instruction = 32'b000000_00000_00000_00000_00000_000010;
	 #5;
	 
	 /**********sra*******/
	 instruction = 32'b000000_00000_00000_00000_00000_000011;
	 #5;
	 
	 /**********beq*******/
	 instruction = 32'b000100_00000_00000_00000_00000_000000;
	 #5;
	 
	 /**********bne*******/
	 instruction = 32'b000101_00000_00000_00000_00000_000000;
	 #5;
	 
	 /**********jr*******/
	 instruction = 32'b000000_00000_00000_00000_00000_001000;
	 #5;
	 
	 /**********j*******/
	 instruction = 32'b000010_00000_00000_00000_00000_000000;
	 #5;
	 
	 #10;
	 $stop;

end

endmodule 

你可能感兴趣的:(基于verilog的单周期处理器设计)