CPU设计之三——VerilogHDL 开发流水线处理器(支持50条指令)

CPU设计之一——VerilogHDL 开发单周期处理器(支持10条指令)
CPU设计之二——VerilogHDL 开发流水线处理器(支持42条指令)

所有代码和参考文件已经上传至github:https://github.com/lcy19981225/Multi-Cycle-CPU-50

VerilogHDL 开发流水线处理器(支持50条指令)

写在前面

  • 本实验是对前面已经交过的42条的实验进行的补充,补充的内容为乘除法的实验,大家在做之前应该参照MIPS手册(单周期已经发过)搞懂乘除法的具体操作再开始

  • 对于乘除法,我们需要在EX阶段增加一个计算单元md,用来专门计算乘除法,同时lo和hi寄存器也在md中,新增的操作包括四个运算,结果存在lo和hi中;加载rs的数据到lo和hi中;从lo或hi中加载数据到寄存器堆,参考图如下

  • 乘除法部件接口设计

信号名 方向 描述
D1[31:0] Input D1[31:0] Input1.执行乘除法指令时的第1个操作数 2.执行 mthi/mtlo 指令时的写入数据
D2[31:0] Input 执行乘除法指令时的第2个操作数
HiLo Input 待写入的寄存器 0:LO寄存器 1:HI寄存器
Op[1:0] Input 运算类型 00:无符号乘法 01:有符号乘法 10:无符号除法 11:有符号除法
Start Input 运算启动。该信号只有效1个cycle 1:启动
We Input HI或LO寄存器的写使能
Busy Output 乘除法模块的忙标志 0:乘除单元未执行运算 1:乘除单元正在执行运算
HI[31:0] Output HI寄存器的输出值
LO[[31:0]] Output LO寄存器的输出值
Clk,Rst Input 时钟,复位
  • 自Start信号为1后的第一个时钟上升沿开始,乘除部件开始执行运算,同时Busy置为1。 在运算结果保存到HI和LO后,Busy位清除为0。
  • 当Busy为1时,mfhi,mflo,mthi,mtlo,mult,multu,div,divu 均被阻塞,即被阻塞在IF/ID。
  • 数据写入HI或LO,均只需1个cycle。

模块设计

md

利用自带的乘除,取余运算进行运算,同时包含lo和hi寄存器

module md(Clk,rs,rt,md_control,updatemd,start_mult,start_div,busy,hi,lo);
    input Clk,start_mult,start_div,updatemd;
    input [31:0] rs,rt;
    input [2:0]md_control;
    output reg[3:0] busy;
    output reg [31:0]hi,lo;

    wire [63:0] res_mult,res_multu;
    wire [31:0] res_mult_hi,res_mult_lo;
    wire [31:0] res_multu_hi,res_multu_lo;
    wire [31:0] res_div_q,res_div_r,res_divu_q,res_divu_r;
    wire [31:0] res_hi,res_lo;
    
    //reg [31:0] hi,lo;
    
    assign res_mult = $signed(rs) * $signed(rt);
    assign res_mult_hi = res_mult[63:32];
    assign res_mult_lo = res_mult[31:0];
    assign res_multu = rs * rt;
    assign res_multu_hi = res_multu[63:32];
    assign res_multu_lo = res_multu[31:0];
    assign res_div_q = $signed(rs) / $signed(rt);
    assign res_div_r = $signed(rs) % $signed(rt);
    assign res_divu_q = rs / rt;
    assign res_divu_r = rs % rt;

    assign res_hi = (~md_control[2]&~md_control[1]&~md_control[0])?res_mult_hi:((~md_control[2]&~md_control[1]&md_control[0])?res_multu_hi:((~md_control[2]&md_control[1]&~md_control[0])?res_div_r:((~md_control[2]&md_control[1]&md_control[0])?res_divu_r:rs)));
    assign res_lo = (~md_control[2]&~md_control[1]&~md_control[0])?res_mult_lo:((~md_control[2]&~md_control[1]&md_control[0])?res_multu_lo:((~md_control[2]&md_control[1]&~md_control[0])?res_div_q:((~md_control[2]&md_control[1]&md_control[0])?res_divu_q:rs)));
    //assign res_rd = res_mult[31:0];
    
    initial begin
        busy = 0;
    end
    /*
    always@(posedge Clk)begin
        if(start_mult ==1&&busy==0)begin
            busy = 4'b0101;
        end
        if(start_div == 1&&busy==0)begin
            busy = 4'b1010;
        end
        if(busy!=0)begin
            busy = busy-1;
        end
    end
    */
    
    always@(res_hi or res_lo)begin
        if((~md_control[2]&~md_control[1]&~md_control[0])&&updatemd==1)begin//mult
            hi = res_hi;
        end
        else if((~md_control[2]&~md_control[1]&md_control[0])&&updatemd==1)begin//multu
            hi = res_hi;
        end
        else if ((~md_control[2]&md_control[1]&~md_control[0])&&updatemd==1)begin//div
            hi = res_hi;
        end
        else if ((~md_control[2]&md_control[1]&md_control[0])&&updatemd==1)begin//divu
            hi = res_hi;
        end
        else if ((md_control[2]&~md_control[1]&~md_control[0])&&updatemd==1)begin//mthi
            hi = res_hi;
        end
        
        if((~md_control[2]&~md_control[1]&~md_control[0])&&updatemd==1)begin//mult
            lo = res_lo;
        end
        else if((~md_control[2]&~md_control[1]&md_control[0])&&updatemd==1)begin//multu
            lo = res_lo;
        end
        else if ((~md_control[2]&md_control[1]&~md_control[0])&&updatemd==1)begin//div
            lo = res_lo;
        end
        else if ((~md_control[2]&md_control[1]&md_control[0])&&updatemd==1)begin//divu
            lo = res_lo;
        end
        else if ((md_control[2]&~md_control[1]&md_control[0])&&updatemd==1)begin//mtlo
            lo = res_lo;
        end
    end
    
endmodule

mips

将所有的模块合起来

module mips(Clk,Reset);

input Clk,Reset;

wire [31:0] IF_NextAddr,IF_Addr,WriteData,WriteData_final,Alu_Y,Alu_X,E_NUM_X,E_NUM_Y,ID_ext32_L2,ID_B,ID_J,IF_PCAdd4,ID_PCAdd4,IF_Inst,ID_Qa,ID_Qb,ID_rs,ID_rt,ID_ext32,E_Alu_Out,ID_Inst,M_Dout,W_ext_Dout,E_res_hi,E_res_lo;
wire [31:0] E_Qa,E_Qb,M_Alu_Out,M_NUM_Y,W_Alu_Out,W_Dout,E_ext32,E_sa,ID_sa,M_res_hi,M_res_lo,W_res_hi,W_res_lo;
wire [4:0] W_WriteReg,M_WriteReg;
wire [4:0] E_WriteReg;
wire [4:0] ID_WriteReg;
wire [1:0] E_save_option;
wire [1:0] ID_save_option,M_save_option;
wire [2:0] E_FwdA,E_FwdB,ID_FwdB,ID_FwdA,ID_PCSrc,ID_load_option,E_load_option,M_load_option,W_load_option,ID_md_control,E_md_control,M_md_control,W_md_control;
wire [3:0] ID_ALUControl,E_ALUControl,BE;
wire Se,Z,E_RegDst,c_adventure,ID_RegDst,M_RegWrite,E_RegWrite,W_RegWrite,ID_RegWrite,E_ALUXSrc,E_ALUYSrc,M_RegDst,W_MemtoReg,ID_ALUXSrc,ID_ALUYSrc;
wire ID_MemtoReg,ID_MemWrite,ID_usigned;
wire E_MemtoReg,E_MemWrite,E_usigned,M_MemtoReg,M_MemWrite,ID_B_code,E_B_code,over,ID_md_signal,E_md_signal,M_md_signal,W_md_signal,ID_start_mult,ID_start_div,E_start_mult,E_start_div;
wire [3:0] busy;
wire stall,stallstall,Cout,ID_mfhi,ID_mflo,ID_updatemd,E_updatemd;


//IF

MUX4X32_addr mux4x32(IF_PCAdd4,ID_B,ID_J,ID_rs,ID_PCSrc,IF_NextAddr);
PC PC(IF_NextAddr,Clk,Reset,IF_Addr,stall,stallstall,busy);
PCAdd4 PCAdd4(IF_Addr,IF_PCAdd4);
im_4k im_4k(IF_Addr,IF_Inst);

REG_IF_ID REG_IF_ID(IF_PCAdd4,IF_Inst,Clk,Reset,ID_PCAdd4,ID_Inst,stall,stallstall,busy);

//ID
CU CU(ID_start_mult,ID_start_div,ID_mfhi,ID_mflo,ID_Inst,ID_Inst[5:0],ID_Inst[16],ID_RegDst,Se,ID_RegWrite,ID_ALUXSrc,ID_ALUYSrc,ID_ALUControl,ID_md_control,ID_MemWrite,ID_PCSrc,ID_MemtoReg,ID_load_option,ID_save_option,ID_usigned,c_adventure,ID_md_signal,ID_updatemd);

MUX2X5 mux2x5(ID_Inst[15:11],ID_Inst[20:16],ID_RegDst,ID_WriteReg);//选择写到rt还是rd0
RegisterFile RegisterFile(ID_Inst[25:21],ID_Inst[20:16],ID_Inst[15:11],WriteData_final,W_WriteReg,W_RegWrite,Clk,Reset,ID_Qa,ID_Qb,ID_PCSrc,ID_PCAdd4);

MUX4X32_forward mux2x32_ID_X(ID_Qa,M_Alu_Out,E_res_hi,E_res_lo,ID_FwdA,ID_rs);
MUX4X32_forward mux2x32_ID_Y(ID_Qb,M_Alu_Out,E_res_hi,E_res_lo,ID_FwdB,ID_rt);

if_c_adventure if_c_adventure(ID_rs,ID_rt,ID_ALUControl,ID_usigned,c_adventure);
FU FU(ID_mfhi,ID_mflo,E_md_signal,E_RegWrite,E_WriteReg,E_MemtoReg,M_RegWrite,M_WriteReg,M_MemtoReg,ID_Inst[25:21],ID_Inst[20:16],ID_FwdA,ID_FwdB,ID_Inst[31:26],ID_Inst[5:0],c_adventure,stall,stallstall);

EXT16T32 ext16t32(ID_Inst[15:0],Se,ID_ext32);
EXT5T32 ext5t32(ID_Inst[10:6],ID_sa);
SHIFTER32_L2 shifter(ID_ext32,ID_ext32_L2);
CLA_32 get_b_address(ID_PCAdd4,ID_ext32_L2,0,ID_B,Cout);

SHIFTER_COMBINATION get_j_address(ID_Inst[25:0],ID_PCAdd4,ID_J);//J指令的跳转地址

REG_ID_EX REG_ID_EX(ID_start_mult,ID_start_div,ID_updatemd,ID_md_signal,ID_Inst[16],ID_sa,ID_RegDst,ID_RegWrite,ID_ALUXSrc,ID_ALUYSrc,ID_ALUControl,ID_md_control,ID_MemWrite,ID_MemtoReg,ID_WriteReg,ID_usigned,ID_Qa,ID_Qb,ID_ext32,ID_FwdA,ID_FwdB,ID_load_option,ID_save_option,Clk,Reset,
E_start_mult,E_start_div,E_updatemd,E_md_signal,E_B_code,E_sa,E_RegDst,E_RegWrite,E_ALUXSrc,E_ALUYSrc,E_ALUControl,E_md_control,E_MemWrite,E_MemtoReg,E_WriteReg,E_usigned,E_Qa,E_Qb,E_ext32,E_FwdA,E_FwdB,E_load_option,E_save_option,stall,stallstall,busy);

//EX
MUX5X32 mux3x32_ex_X(E_Qa,M_Alu_Out,WriteData_final,M_res_hi,M_res_lo,E_FwdA,E_NUM_X);
MUX2X32 choose_alu_x(E_NUM_X,E_sa,E_ALUXSrc,Alu_X);

MUX5X32 mux3x32_ex_Y(E_Qb,M_Alu_Out,WriteData_final,M_res_hi,M_res_lo,E_FwdB,E_NUM_Y);
MUX2X32 choose_alu_y(E_ext32,E_NUM_Y,E_ALUYSrc,Alu_Y);
ALU ALU(Alu_X,Alu_Y,E_ALUControl,E_usigned,E_Alu_Out,Z,over);
md md(Clk,E_NUM_X,E_NUM_Y,E_md_control,E_updatemd,E_start_mult,E_start_div,busy,E_res_hi,E_res_lo);

REG_EX_MEM REG_EX_MEM(E_md_control,E_md_signal,E_res_hi,E_res_lo,E_RegWrite,E_RegDst,E_MemWrite,E_MemtoReg,E_WriteReg,E_NUM_Y,E_Alu_Out,E_load_option,E_save_option,Clk,Reset,
M_md_control,M_md_signal,M_res_hi,M_res_lo,M_RegWrite,M_RegDst,M_MemWrite,M_MemtoReg,M_WriteReg,M_NUM_Y,M_Alu_Out,M_load_option,M_save_option,busy);

//MEM
save_to_BE save_to_BE(M_save_option,BE);
dm_4k dm_4k(M_Alu_Out,BE,M_NUM_Y,M_Dout,M_MemWrite,Clk);

REG_MEM_WB REG_MEM_WB(M_md_control,M_md_signal,M_res_hi,M_res_lo,M_RegWrite,M_MemtoReg,M_Alu_Out,M_Dout,M_WriteReg,M_load_option,Clk,Reset,
W_md_control,W_md_signal,W_res_hi,W_res_lo,W_RegWrite,W_MemtoReg,W_Alu_Out,W_Dout,W_WriteReg,W_load_option);

//WB
data_ext_load data_ext_load(W_Dout,W_Alu_Out,W_load_option,W_ext_Dout);
MUX2X32 mux2x322(W_Alu_Out,W_ext_Dout,W_MemtoReg,WriteData);
MUX2X32_md choose_md(WriteData,W_res_hi,W_res_lo,W_md_signal,W_md_control,WriteData_final);
endmodule

信号控制

因为新加了8条指令,所以我们在CU中也要相应的增加几个信号,分别是:

  • md_control[2:0]:用来记录是新增加的8条指令中的哪一条
  • updatemd:用来判断是否要更新ho或者lo
  • md_signal:用来判断传回寄存器堆的值是原来的方式还是从hi或者lo中传过来的
  • busy:用来判断是否正在执行乘除法
  • start_mult:判断是否开始一个乘法,用来决定是否要暂停5个周期
  • start_div:判断是否开始一个除法,用来决定是否要暂停10个周期

所有新加的信号的具体的值已经在CU.xlsx中修改,可以查看,CU.xlsx在42条的github中已经上传

结果验证

我的验证方法

  • 查看I_addu等信号来判断执行的是哪一条指令
  • 查看Regrt等信号是否正确
  • 查看寄存器堆的读写情况可以很好地判断进行的操作是否正确
  • 查看各级输出以及传递情况

最终结果

mars结果如图

在这里插入图片描述

我的结果,可以在示波器中查看dm_4k中Ram的值来判断是否正确,一共只有13个寄存器有值

CPU设计之三——VerilogHDL 开发流水线处理器(支持50条指令)_第1张图片

可以发现结果相同。至此,全部实验结束。

总的来说整个实验还是收获非常大,完成之后也是非常happy~如果你觉得对你有帮助的话,就点个赞吧嘻嘻

你可能感兴趣的:(Verilog,现代处理器设计,Modern,Processor)