单周期处理器是指取指译码等操作在一个时钟周期内完成。本单周期处理器支持R型指令add,addu,sub,subu,slt,sltu;I型指令ori,lw,sw,beq,bne,移位指令sll,srl,sra指令;J型指令的J指令和Jr指令。单周期处理器设计包括两大部分设计,一个是控制部件的设计,另一个是数据通路的设计。
设计整体框图如下:
顶层模块如下
module SingleCycleCpu(clk,rst_n);
input clk;
input rst_n;
wire [31:0] instruction;
wire RegDst;
wire beq;
wire bne;
wire Jump;
wire ExtOp;
wire ALUSrc;
wire [3:0] ALUctr;
wire MemWr;
wire RegWr;
wire MemtoReg;
wire jr;
Controler Controler(
.instruction(instruction),
.RegDst(RegDst),
.bne(bne),
.beq(beq),
.Jump(Jump),
.ExtOp(ExtOp),
.ALUSrc(ALUSrc),
.ALUctr(ALUctr),
.MemWr(MemWr),
.RegWr(RegWr),
.MemtoReg(MemtoReg),
.jr(jr)
);
SingleDataPath SingleDataPath(
.clk(clk),
.rst_n(rst_n),
.RegDst(RegDst),
.RegWr(RegWr),
.Jump(Jump),
.ExtOp(ExtOp),
.ALUSrc(ALUSrc),
.ALUctr(ALUctr),
.MemWr(MemWr),
.MemtoReg(MemtoReg),
.instruction(instruction),
.bne(bne),
.beq(beq),
.jr(jr)
);
endmodule
指令存储器设计如下
module RomNoDelay(
input clk,
input [7:0] RdAddr,
output [31:0] RdData
);
reg [31:0] rom [255:0];
initial begin
$readmemh("F:/work/digital_system_practise/data_path/rtl/inst_rom.data",rom);
end
assign RdData = rom[RdAddr];
endmodule
数据存储器设计如下
module RamNoDelay(
input clk,
input wen,
input [7:0] WrAddr,
input [31:0] WrData,
input [7:0] RdAddr,
output [31:0] RdData
);
reg [31:0] ram [255:0];
always@(posedge clk)begin
if(wen)
ram[WrAddr] <= WrData;
end
assign RdData = ram[RdAddr];
endmodule
寄存器堆设计如下
module dflipflop(
input clk,
input [4:0] Ra,
input [4:0] Rb,
input [4:0] Rw,
input Wen,
output [31:0] BusA,
output [31:0] BusB,
input [31:0] BusW
);
reg [31:0]DataReg[31:0];
always@(posedge clk)
begin
if(Wen & Rw!=5'd0)
DataReg[Rw] <= BusW;
end
assign BusA = (Ra==5'd0)?32'd0:DataReg[Ra];
assign BusB = (Rb==5'd0)?32'd0:DataReg[Rb];
endmodule
module alu(
input [31:0] alu_DA,
input [31:0] alu_DB,
input [3:0] alu_CTL,
input [4:0] alu_SHIFT,
output alu_ZERO,
output alu_Overflow,
output reg [31:0] alu_DC
);
/******************general ctr ****************************/
wire SUBctr;
wire SIGctr;
wire Ovctr;
wire [1:0] Opctr;
wire [1:0] Logicctr;
wire [1:0] Shiftctr;
assign SUBctr = ((~alu_CTL[3]) & ~alu_CTL[2] & alu_CTL[1])|(alu_CTL[3] & ~alu_CTL[2]);
assign Opctr = alu_CTL[3:2];
assign Ovctr = (alu_CTL[0] & ~alu_CTL[3] & ~alu_CTL[2])|(alu_CTL[3] & ~alu_CTL[2] & ~alu_CTL[1] & alu_CTL[0]);
assign SIGctr = alu_CTL[0];
assign Logicctr = alu_CTL[1:0];
assign Shiftctr = alu_CTL[1:0];
/*****************logic op****************************/
reg [31:0] logic_result;
always@(*)begin
case(Logicctr)
2'b00:logic_result = alu_DA & alu_DB;
2'b01:logic_result = alu_DA | alu_DB;
2'b10:logic_result = alu_DA ^ alu_DB;
2'b11:logic_result = ~(alu_DA | alu_DB);
endcase
end
/********************shift op****************************/
wire [31:0] shift_result;
Shifter Shifter(
.alu_DB(alu_DB),
.ALUSHIFT(alu_SHIFT),
.Shiftctr(Shiftctr),
.shift_result(shift_result)
);
/************************add sub op*********************************/
wire [31:0] BIT_M,XOR_M;
wire ADD_carry,ADD_OverFlow;
wire [31:0] ADD_result;
wire [31:0] comp_M;
assign BIT_M = {32{SUBctr}};
assign XOR_M = (alu_DB[31] && SUBctr)?{1'b0,alu_DB[30:0]}:BIT_M ^ alu_DB;
assign comp_M = (alu_DB[31] && SUBctr)?XOR_M: XOR_M + SUBctr;
assign alu_Overflow = ADD_OverFlow & Ovctr;
assign {ADD_carry,ADD_result} = alu_DA + comp_M;
assign ADD_OverFlow = (alu_DA[31] && comp_M[31] && !ADD_result[31]) +
(!alu_DA[31] && !comp_M[31] && ADD_result[31]);
assign alu_ZERO = (|ADD_result)?1'b0:1'b1;
/****************************slt op****************************/
wire [31:0] SLT_result;
wire LESS_M1,LESS_M2,LESS_S,SLT_M;
assign LESS_M1 = ADD_carry ^ SUBctr;
assign LESS_M2 = ADD_OverFlow ^ ADD_result[31];
assign LESS_S = (SIGctr == 1'b0)?LESS_M1:LESS_M2;
assign SLT_result = (LESS_S)?32'hffff_ffff:32'h0000_0000;
/****************************ALU_result***************************/
always@(*)
begin
case(Opctr)
2'b00:alu_DC = ADD_result;
2'b01:alu_DC = logic_result;
2'b10:alu_DC = SLT_result;
2'b11:alu_DC = shift_result;
endcase
end
endmodule
移位器设计如下:
module Shifter(
input [31:0] alu_DB,
input [4:0] ALUSHIFT,
input [1:0] Shiftctr,
output reg [31:0] shift_result
);
wire [5:0] shift_n;
assign shift_n = 6'd32 - ALUSHIFT;
always@(*)begin
case(Shiftctr)
2'b00: shift_result = alu_DB << ALUSHIFT;
2'b01: shift_result = alu_DB >> ALUSHIFT;
2'b10: shift_result = ({32{alu_DB[31]}} << shift_n) | (alu_DB >> ALUSHIFT);
default: shift_result = alu_DB;
endcase
end
endmodule
module if_fetch(
input rst_n,
input clk,
input jump,
input beq,
input bne,
input zero,
input jr,
input [31:0] BusA,
output [31:0] ins
);
reg [29:0] PC;
wire [29:0] PC_next;
wire [29:0] PC_sel;
wire [29:0] PC_Branch;
wire [29:0] PC_INC;
wire [15:0] imme;
RomNoDelay RomNoDelay(
.clk(clk),
.RdAddr(PC[7:0]),
.RdData(ins)
);
always@(posedge clk or negedge rst_n)
if(!rst_n)
PC <= 30'd0;
else
PC <= PC_next;
assign imme = ins[15:0];
assign PC_INC = PC + 1'b1;
assign PC_Branch = PC_INC + {{14{imme[15]}},imme};
assign PC_sel = ((beq && zero) || (bne && !zero))?PC_Branch:PC_INC;
assign PC_next = (jump)?{PC[29:26],ins[25:0]}
:(jr)? BusA[31:2]
:PC_sel;
endmodule
数据通路模块的代码如下
module SingleDataPath(
clk,
rst_n,
RegDst,
RegWr,
Jump,
ExtOp,
ALUSrc,
ALUctr,
MemWr,
MemtoReg,
instruction,
bne,
beq,
jr
);
input clk;
input rst_n;
input RegDst;
input RegWr;
input Jump;
input ExtOp;
input ALUSrc;
input [3:0]ALUctr;
input MemWr;
input MemtoReg;
input bne;
input beq;
input jr;
output [31:0]instruction;
wire zero;
wire [31:0]instruction;
wire [4:0] Rw;
wire Rwen;
wire [31:0] BusA;
wire [31:0] BusB;
wire [31:0] BusBm;
wire [31:0] BusW;
wire [31:0] ALU_DC;
wire [31:0]RdData;
wire overflow;
wire [31:0] Extimme;
wire [4:0] Rs;
wire [4:0] Rt;
wire [4:0] Rd;
wire [4:0] shift;
wire [15:0] imme;
assign Rs = instruction[25:21];
assign Rt = instruction[20:16];
assign Rd = instruction[15:11];
assign shift = instruction[10:6];
assign imme = instruction[15:0];
assign Rw = (RegDst)?Rd:Rt;
assign Rwen = (~overflow) & RegWr;
assign BusW = (MemtoReg)?RdData:ALU_DC;
assign Extimme = (ExtOp)?{{16{imme[15]}},imme}:{16'b0,imme};
assign BusBm = (ALUSrc)?Extimme:BusB;
alu alu(
.alu_DA(BusA),
.alu_DB(BusBm),
.alu_CTL(ALUctr),
.alu_SHIFT(shift),
.alu_ZERO(zero),
.alu_Overflow(overflow),
.alu_DC(ALU_DC)
);
dflipflop regfile(
.clk(clk),
.Ra(Rs),
.Rb(Rt),
.Rw(Rw),
.Wen(Rwen),
.BusA(BusA),
.BusB(BusB),
.BusW(BusW)
);
if_fetch if_fetch(
.rst_n(rst_n),
.clk(clk),
.jump(Jump),
.beq(beq),
.bne(bne),
.zero(zero),
.jr(jr),
.BusA(BusA),
.ins(instruction)
);
RamNoDelay RamNoDelay(
.clk(clk),
.wen(MemWr),
.WrAddr(ALU_DC),
.WrData(BusB),
.RdAddr(ALU_DC),
.RdData(RdData)
);
endmodule
控制部件的模块代码如下
module Controler(
instruction,
RegDst,
bne,
beq,
Jump,
ExtOp,
ALUSrc,
ALUctr,
MemWr,
RegWr,
MemtoReg,
jr
);
input [31:0] instruction;
output RegDst;
output beq;
output bne;
output Jump;
output ExtOp;
output ALUSrc;
output [3:0] ALUctr;
output MemWr;
output RegWr;
output MemtoReg;
output jr;
wire [5:0] op;
wire [5:0] func;
wire R_type;
wire [3:0] aluOp1;
wire [3:0] aluOp2;
assign op = instruction[31:26];
assign func = instruction[5:0];
assign jr = ((op == 6'b000000) && (func == 6'b001000));
maincontrol maincontrol(
.op(op),
.RegDst(RegDst),
.beq(beq),
.bne(bne),
.Jump(Jump),
.ExtOp(ExtOp),
.ALUSrc(ALUSrc),
.aluOp(aluOp1),
.MemWr(MemWr),
.RegWr(RegWr),
.MemtoReg(MemtoReg),
.R_type(R_type)
);
aluControl aluControl(
.func(func),
.aluOp(aluOp2)
);
assign ALUctr = (R_type)?aluOp2:aluOp1;
endmodule
module maincontrol(
op,
RegDst,
beq,
bne,
Jump,
ExtOp,
ALUSrc,
aluOp,
MemWr,
MemtoReg,
RegWr,
R_type
);
input [5:0] op;
output RegDst;
output RegWr;
output Jump;
output ExtOp;
output ALUSrc;
output [3:0] aluOp;
output MemWr;
output MemtoReg;
output R_type;
output beq;
output bne;
wire ls;
wire lw = (op == 6'b100011);
wire sw = (op == 6'b101011);
wire beq1 = (op == 6'b000100);
wire bne1 = (op == 6'b000101);
wire j = (op == 6'b000010);
wire ori = (op == 6'b001101);
assign R_type = (op == 6'b000000);
assign RegDst = R_type;
assign RegWr = R_type | lw | ori;
assign beq = beq1;
assign bne = bne1;
assign Jump = j;
assign ExtOp = lw | sw;
assign ALUSrc = lw | sw | ori;
assign MemWr = sw;
assign MemtoReg = lw;
assign ls = lw | sw;
assign aluOp = ({4{ls}}&4'b0001) | ({4{beq}}& 4'b0011) | ({4{bne}}& 4'b0011)|({4{ori}}& 4'b0101);
endmodule
module aluControl(
input [5:0] func,
output [3:0] aluOp
);
wire add = (func == 6'b100000);
wire addu = (func == 6'b100001);
wire sub = (func == 6'b100010);
wire subu = (func == 6'b100011);
wire and1 = (func == 6'b100100);
wire or1 = (func == 6'b100101);
wire xor1 = (func == 6'b100110);
wire nor1 = (func == 6'b100111);
wire sltu = (func == 6'b101011);
wire slt = (func == 6'b101010);
wire sll = (func == 6'b000000);
wire srl = (func == 6'b000010);
wire sra = (func == 6'b000011);
assign aluOp = ({4{add}} & 4'b0001) |
({4{addu}} & 4'b0000) |
({4{sub}} & 4'b0011) |
({4{subu}} & 4'b0010) |
({4{and1}} & 4'b0100) |
({4{or1}} & 4'b0101) |
({4{xor1}} & 4'b0110) |
({4{nor1}} & 4'b0111) |
({4{sltu}} & 4'b1000) |
({4{slt}} & 4'b1001) |
({4{sll}} & 4'b1100) |
({4{srl}} & 4'b1101) |
({4{sra}} & 4'b1110);
endmodule
1、 总体仿真验证
1.1 验证方案
然后把之前编写的makefile,BinMem.exe,ram.ld放到源代码所在的目录,输入命令make all,得到与汇编代码相匹配的16进制格式的指令,仿真时将汇编代码的结果与仿真结果比较。
编写的inst_rom.S如下:
.org 0x0
.set noat
.set noreorder
.set nomacro
.global _start
_start:
ori $1,$0,0x8000 # $1 = 0x00008000
sll $1,$1,16 # $1 = 0x80000000
ori $1,$1,0x0010 # $1 = 0x80000010
ori $2,$0,0x8000 # $2 = 0x00008000
sll $2,$2,16 # $2 = 0x80000000
ori $2,$2,0x0001 # $2 = 0x80000001
ori $3,$0,0x0000 # $3 = 0x00000000
addu $3,$2,$1 # $3 = 0x00000011
ori $3,$0,0x0000 # $3 = 0x00000000
sub $3,$1,$3 # $3 = 0x80000010
subu $3,$3,$2
######### slt\sltu ##########
ori $1,$0,0xffff # $1 = 0xffff
sll $1,$1,16 # $1 = 0xffff0000
slt $2,$1,$0 # $2 = 1
sltu $2,$2,$0 # $2 = 0
######### lw\sw ##########
ori $3,$0,0x4455
sll $3,$3,0x10
ori $3,$3,0x6677
sw $3,0x8($0) # [0x8] = 0x44, [0x9]= 0x55, [0xa]= 0x66, [0xb] = 0x77
lw $1,0x8($0) # $1 = 0x44556677
######### sll\srl ##########
sll $1,$1,4 # $1 = 0x45566770
srl $1,$1,4 # $1 = 0x04556677
ori $1,$0,0x8000 # $1 = 0x00008000
sll $1,$1,16 # $1 = 0x80000000
sra $1,$1,4 # $1 = 0xf8000000
bne $1,$0,N1
nop
nop
N1: beq $1,$1,s3
s3: ori $1,$0,0x80 # $1 = 0x00000080
jr $1 # $1 =0x00000080 跳转到 0x00000080处
.org 0x80
j 0x90
.org 0x90
ori $2,$0,56 # $2 = 56
ori $1,$0,4 # $1 = 4
ori $3,$0,0 # $3 = 0
add $3,$1,$2 # $3 = 60
生成的初始化ROM中的16进制指令如下
34018000
00010c00
34210010
34028000
00021400
34420001
34030000
00411821
34030000
00231822
00621823
3401ffff
00010c00
0020102a
0040102b
34034455
00031c00
34636677
ac030008
8c010008
00010900
00010902
34018000
00010c00
00010903
14200002
00000000
00000000
10210000
34010080
00200008
00000000
08000024
00000000
00000000
00000000
34020038
34010004
34030000
00221820
1.2 仿真结果(所有指令格式与ROM中一致,为16进制)
34210010 ori $1,$1,0x0010 # $1 = 0x80000010
观察BusW为0x80000010
00411821 addu $3,$2,$1 # $3 = 0x00000011
观察BusW为0x00000011
00231822 sub $3,$1,$3 # $3 = 0x80000010
观察BusW为0x80000010
0020102a slt $2,$1,$0 # $2 = 0xffffffff
观察BusW为0xffffffff
ac030008为sw指令的16进制编码,8c010008为lw指令的16进制编码
sw $3,0x8($0) # [0x8] = 0x44, [0x9]= 0x55, [0xa]= 0x66, [0xb] = 0x77
lw $1,0x8($0) # $1 = 0x44556677
观察BusW的为0x44556677
00010900 sll $1,$1,4 # $1 = 0x45566770
观察BusW的为0x45566770
00010902 srl $1,$1,4 # $1 = 0x04556677
观察BusW的为0x04556677
00010903 sra $1,$1,4 # $1 = 0xf8000000
观察BusW的为0xf8000000
bne $1,$0,N1
nop
nop
N1: beq $1,$1,s3
s3: ori $1,$0,0x80 # $1 = 0x00000080
如果执行到最后一步,说明beq和bne指令执行正确
观察BusW的值为0x00000080,且取出的指令为34010080,就是ori指令的16进制编码
jr $1 # $1 =0x00000080 跳转到 0x00000080处
.org 0x80
j 0x90
.org 0x90
ori $2,$0,56 # $2 = 56
ori $1,$0,4 # $1 = 4
ori $3,$0,0 # $3 = 0
add $3,$1,$2 # $3 = 60
如果最后四条指令执行了,j和jr指令仿真正确
Add指令16进制编码为00221820,为最后一条指令,最后结果得60
观察BusW的值依次为56,4,0,60
到此完整的单周期处理器设计加仿真就完成了。
附上控制器的仿真代码
`timescale 1ns/1ns
module Controler_tb;
reg[31:0] instruction;
wire RegDst;
wire beq;
wire bne;
wire Jump;
wire ExtOp;
wire ALUSrc;
wire [3:0] ALUctr;
wire MemWr;
wire MemtoReg;
wire jr;
wire RegWr;
Controler Controler(
.instruction(instruction),
.RegDst(RegDst),
.bne(bne),
.beq(beq),
.Jump(Jump),
.ExtOp(ExtOp),
.ALUSrc(ALUSrc),
.ALUctr(ALUctr),
.MemWr(MemWr),
.MemtoReg(MemtoReg),
.RegWr(RegWr),
.jr(jr)
);
initial begin
/**********add*******/
instruction = 32'b000000_00000_00000_00000_00000_100000;
#5;
/**********addu*******/
instruction = 32'b000000_00000_00000_00000_00000_100001;
#5;
/**********sub*******/
instruction = 32'b000000_00000_00000_00000_00000_100010;
#5;
/**********subu*******/
instruction = 32'b000000_00000_00000_00000_00000_100011;
#5;
/**********slt*******/
instruction = 32'b000000_00000_00000_00000_00000_101010;
#5;
/**********sltu*******/
instruction = 32'b000000_00000_00000_00000_00000_101011;
#5;
/**********and*******/
instruction = 32'b000000_00000_00000_00000_00000_100100;
#5;
/**********or*******/
instruction = 32'b000000_00000_00000_00000_00000_100101;
#5;
/**********xor*******/
instruction = 32'b000000_00000_00000_00000_00000_100110;
#5;
/**********nor*******/
instruction = 32'b000000_00000_00000_00000_00000_100111;
#5;
/**********sll*******/
instruction = 32'b000000_00000_00000_00000_00000_000000;
#5;
/**********srl*******/
instruction = 32'b000000_00000_00000_00000_00000_000010;
#5;
/**********sra*******/
instruction = 32'b000000_00000_00000_00000_00000_000011;
#5;
/**********beq*******/
instruction = 32'b000100_00000_00000_00000_00000_000000;
#5;
/**********bne*******/
instruction = 32'b000101_00000_00000_00000_00000_000000;
#5;
/**********jr*******/
instruction = 32'b000000_00000_00000_00000_00000_001000;
#5;
/**********j*******/
instruction = 32'b000010_00000_00000_00000_00000_000000;
#5;
#10;
$stop;
end
endmodule