俗话说设计验证不分家,一个好的verifier一定也是一个好的designer,因此至少需要掌握一些典型电路的Verilog设计。
module seq_gen
#(parameter cnt_size = 8)
(input clk,
input rstn,
output seq);
reg [cnt_size-1:0] cnt0;
reg [cnt_size-1:0] cnt1;
localparam S0 = 1'd0;
localparam S1 = 1'd1;
reg current_s;
reg next_s;
reg seq_r;
always@(posedge clk or negedge rstn) begin
if(!rstn)
current_s <= S0;
else
current_s <= next_s;
end
always@(*) begin
case(current_s)
S0: next_s <= S1;
S1: if(cnt1==0)
next_s <= S0;
else
next_s <= S1;
default: next_s <= S0;
endcase
end
always@(posedge clk or negedge rstn) begin
if(!rstn) begin
cnt0 <= 0;
cnt1 <= 0;
seq_r <= 0;
end
else
case(current_s)
S0: begin
cnt0 <= cnt0 + 1;
cnt1 <= cnt0;
seq_r <= 0;
end
S1: begin
cnt1 <= cnt1 - 1;
seq_r <= 1;
end
default: begin
cnt0 <= 0;
cnt1 <= 0;
seq_r <= 0;
end
endcase
end
assign seq = seq_r;
endmodule
//test bench
`timescale 1ns/1ps
module tb();
reg clk;
reg rstn;
wire seq;
initial begin
clk <= 1'b0;
forever begin
#5 clk <= !clk;
end
end
initial begin
#10 rstn <= 1'b0;
repeat(10) @(posedge clk);
rstn <= 1'b1;
end
seq_gen#(.cnt_size(8))
dut(.clk(clk),
.rstn(rstn),
.seq(seq));
endmodule
//3-8译码器
module decode3_8(input [2:0] in,output reg [7:0] out);
always@(*) begin
case(in)
3'd0: out = 8'b0000_0001;
3'd1: out = 8'b0000_0010;
3'd2: out = 8'b0000_0100;
3'd3: out = 8'b0000_1000;
3'd4: out = 8'b0001_0000;
3'd5: out = 8'b0010_0000;
3'd6: out = 8'b0100_0000;
3'd7: out = 8'b1000_0000;
default: out = 8'b0000_0000;
endcase
end
endmodule
//BCD译码器(4-10译码器),BCD码又称为8421码,用4位2进制数来表示十进制的0-9
module decode_BCD(input [3:0] in,output reg [9:0] out);
always@(*) begin
case(in)
4'd0: out = 10'b1111_1111_10;
4'd1: out = 10'b1111_1111_01;
4'd2: out = 10'b1111_1110_11;
4'd3: out = 10'b1111_1101_11;
4'd4: out = 10'b1111_1011_11;
4'd5: out = 10'b1111_0111_11;
4'd6: out = 10'b1110_1111_11;
4'd7: out = 10'b1101_1111_11;
4'd8: out = 10'b1011_1111_11;
4'd9: out = 10'b0111_1111_11;
default: out = 10'b1111_1111_11;
endcase
end
endmodule
//其他的也一样,用assign语句据说会好一些,不容易产生不定态。assign的思路就是根据真值表,单独对输出的每一位赋值就行了。
仿真略
编码器
与译码器相反,也可以用always结合case或if…else…实现组合逻辑或者根据真值表由assign连续赋值完成。
加法器
最基础的加法器是半加器和全加器,半加器不考虑来自地位的进位(一般不咋用),全加器考虑来自低位的进位。设计思路一般是通过较少位数的全加器构成多位数的加法器,比如1bit的全加器构成4bit加法器,4bit加法器再构成16bit加法器等等。在构成的过程中有两种方式:串联起来,构成行波进位加法器;通过一个额外的组合逻辑提前计算出最终的进位输出,称为超前进位加法器。
半加器、全加器、行波进位加法器的原理很简单,行波进位加法器由于是一级级串联,低级的进位作为高级的输入,因此需要多个时钟周期才能计算完成,延时很大,多位加法器不采用这种方法;超前进位加法器通过数学原理提前计算出最终的进位和输出表达式,可以在一个周期完成计算,具体原理如下:
对于1bit的全加器而言,sum = a ^ b ^ cin;cout = (a&&b) || (cin && (a^b));
令P=a ^ b,G=a&&b,则有:sum = P ^ cin,cout =G || (cin && P) ;
对于2bit的全加器而言,有sum[0] = P[0] ^ cin,cout[0] = G[0] || (cin && P[0]),sum[1] = P[1] ^ cout[0],cout = G[1] || (cout[0] && P[1]);
对于N bit的全加器而言,有
sum[0] = P[0] ^ cin,cout[0] = G[0] || (cin && P[0])
sum[1] = P[1] ^ cout[0],cout = G[1] || (cout[0] && P[1]);
sum[N-1] = P[N-1] ^ cout[N-2],cout = G[N-1] || (cout[N-2] && P[N-1]);
综上,个人理解的超前进位加法器就是通过将cout[N-2],cout[N-3]等数全部用cin和p,g的组合逻辑表示出来,从而能够对输入变化产生立即响应
//1bit全加器
module adder_1(input a, input b, input cin, output sum, output cout);
assign sum = a^b^cin;//奇数个1相异或的结果为1
assign cout = (a&b) || (cin&(a^b));
endmodule
//4bit全加器-行波进位加法器
module adder_4(input [3:0] a, input [3:0] b, input cin, output [3:0] sum, output cout);
wire c1, c2, c3;
adder_1 dut1(.a(a[0]), .b(b[0]), .cin(cin), .sum(sum[0]), .cout(c1));
adder_1 dut2(.a(a[1]), .b(b[0]), .cin(c1), .sum(sum[1]), .cout(c2));
adder_1 dut3(.a(a[2]), .b(b[0]), .cin(c2), .sum(sum[2]), .cout(c3));
adder_1 dut4(.a(a[3]), .b(b[0]), .cin(c3), .sum(sum[3]), .cout(cout));
endmodule
//4bit全加器-超前进位加法器
module adder_4(input [3:0] a, input [3:0] b, input cin, output [3:0] sum, output co);
wire [3:0] p, g;
wire [2:0] cout;
//给P赋值
assign p = {a[3]^b[3],a[2]^b[2],a[1]^b[1],a[0]^b[0]};
//给G赋值
assign g = {a[3]&&b[3],a[2]&&b[2],a[1]&&b[1],a[0]&&b[0]};
//给进位cout赋值
assign cout[0] = g[0] || cin && p[0];
assign cout[1] = g[1] || (g[0] || cin && p[0]) && p[1];
assign cout[2] = g[2] || (g[1] || (g[0] || cin && p[0]) && p[1]) && p[2];
assign co = g[3] || (g[2] || (g[1] || (g[0] || cin && p[0]) && p[1]) && p[2]) && p[3];
//给sum赋值
assign sum[0] = p[0] ^ cin;
assign sum[1] = p[1] ^ (g[0] || cin && p[0]);
assign sum[2] = p[2] ^ (g[1] || (g[0] || cin && p[0]) && p[1]);
assign sum[3] = p[3] ^ (g[2] || (g[1] || (g[0] || cin && p[0]) && p[1]) && p[2]);
endmodule
//4bit超前进位加法器testbench
`timescale 1ns/1ps
module tb ();
reg [3:0] a,b;
reg cin;
wire [3:0] sum;
wire cout;
initial begin
a <= 1;
b <= 1;
cin <= 0;
forever begin
#20 a <= {$random}%16;
b <= {$random}%16;
cin <= {$random}%2;
end
end
adder_4 dut(.a(a),.b(b),.cin(cin),.sum(sum),.co(cout));
endmodule
下图为4bit超前进位加法器综合电路(quartus综合):
//加法器树形乘法器
module mul_4x4(input [3:0] a, input [3:0] b, output [7:0] y);
assign y = {1'b0,a&{4{b[3]}},3'b0} + {2'b0,a&{4{b[2]}},2'b0} + {3'b0,a&{4{b[1]}},1'b0} + {4'b0,a&{4{b[0]}}};
endmodule
//test bench
`timescale 1ns / 1ps
module tb ();
reg [3:0] a,b;
wire [7:0] y;
initial begin
#10 a <= 4'd1;
b <= 4'd2;
#20
forever begin
#20 a <= {$random} % 16;
b <= {$random} % 16;
end
end
mul_4x4 dut(a,b,y);
endmodule
上述为最基本的加法器树形乘法器,对于高位宽的计算,其组合延时很大,流水线设计可以解决这个问题,通过在组合逻辑中插入寄存器,将其分解为延时很小的子部分,提高电路速度。个人理解:流水线就是通过在输入输出组合路径中插入寄存器,保存中间状态,当所有需要用到的状态值都记录下来后,就可以在一个时钟周期完成所有的计算,实现加速,是一种典型的面积换速度的设计方式。插入了K个寄存器就是K级流水线,流水线设计的缺点是输入输出存在延时以及较多层次流水线会增大电路面积 对于CPU来说,其完整的一个指令执行过程包括:取指、译码、执行、访存、写回五个部分,通过5级流水线在理想状态下可以实现一个指令周期完成指令的执行,实际上目前目前的CPU设计甚至达到了十几级的流水线设计,将工作的各个步骤拆解。
改进:插入两级流水线,代码如下:
//2级流水线4位乘法器
module mul_4x4(input clk, input rstn, input [3:0] a, input [3:0] b, output reg [7:0] y);
reg [7:0] temp1,temp2;//用于保存中间状态的2个寄存器
wire [7:0] s0,s1,s2,s3;
assign s0 = {4'b0,a&{4{b[0]}}};
assign s1 = {3'b0,a&{4{b[1]}},1'b0};
assign s2 = {2'b0,a&{4{b[2]}},2'b0};
assign s3 = {1'b0,a&{4{b[3]}},3'b0};
always@(posedge clk or negedge rstn) begin
if(!rstn) begin
temp1 <= 8'd0;
temp2 <= 8'd0;
y <= 8'd0;
end
else begin
temp1 <= s0 + s1;
temp2 <= s2 + s3;
y <= temp1 + temp2;
end
end
endmodule
//test bench
`timescale 1ns / 1ps
module tb ();
reg [3:0] a,b;
wire [7:0] y;
reg clk ,rstn;
initial begin
clk <= 0;
forever begin
#10 clk <= !clk;
end
end
initial begin
#10 rstn <= 0;
repeat(10) @(posedge clk);
rstn <= 1;
end
initial begin
#10 a <= 4'd1;
b <= 4'd2;
#20
forever begin
#20 a <= {$random} % 16;
b <= {$random} % 16;
end
end
mul_4x4 dut(clk,rstn,a,b,y);
endmodule
module checker(input [7:0] data,output odd,output even);
//data包含了数据和校验位
assign odd = ^data;//odd==1说明检测到为奇校验,与预设方式对比以判断数据是否有效
assign even = ~odd;//even==1说明检测到为偶校验,与预设方式对比以判断数据是否有效
endmodule
//Synchronization FIFO ,depth=8, bit_size=32
module sys_fifo (
input clk,//读写时钟相同,是为同步FIFO
input rst_n,//FIFO复位,数据、地址清零
input wr_en,//写使能
input rd_en,//读使能
input [31:0] data_in,//待写入数据
output [31:0] data_out,//读取数据
output full,//FIFO已满标志
output empty,//FIFO为空标志
output [3:0] margin//FIFO剩余容量
);
//同步FIFO不涉及多时钟域的多bit数据同步问题
reg [3:0] wr_addr;//写地址,宽度为深度所需宽度+1
reg [3:0] rd_addr;//读地址,同上
reg [31:0] mem [0:7];//前位宽后深度,1个8x32的存储器
reg [31:0] data_out_reg;
integer i;
assign full = (rd_addr == {~wr_addr[3],wr_addr[2:0]});
assign empty = (rd_addr == wr_addr);
assign margin = (4'd8 - (wr_addr - rd_addr));
assign data_out = data_out_reg;
//写地址更新
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
wr_addr <= 4'd0;
else if(wr_en && (!full))
wr_addr <= wr_addr + 1'b1;
end
//读地址更新
always@(posedge clk or negedge rst_n) begin
if(!rst_n)
rd_addr <= 4'd0;
else if(rd_en && (!empty))
rd_addr <= rd_addr + 1'b1;
end
//写入存储器
always@(posedge clk or negedge rst_n) begin
if(!rst_n) begin
for(i=0;i<7;i++) begin
mem[i] <= 32'd0;
end
end
else if(wr_en && (!full)) begin
mem[wr_addr[2:0]] <= data_in;
end
end
//从存储器读出
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
// reset
data_out_reg <= 32'd0;
end
else if (rd_en && (!empty)) begin
data_out_reg <= mem[rd_addr[2:0]];
end
else
data_out_reg <= 32'd0;
end
endmodule
//8*32bit异步fifo,采用格雷码加打两拍的方式进行读写时钟域的多bit变量同步
module asys_fifo(
input clk1,//写时钟
input clk2,//读时钟
input rst_n,
input wr_en,
input rd_en,
input [31:0] data_in,
output [31:0] data_out,
output full,
output empty,
output [3:0] margin
);
reg [31:0] mem [7:0];
reg [3:0] wr_addr;
wire [3:0] wr_addr_gray;
reg [3:0] wr_addr_gray_1;
reg [3:0] wr_addr_gray_2;
reg [3:0] rd_addr;
wire [3:0] rd_addr_gray;
reg [3:0] rd_addr_gray_1;
reg [3:0] rd_addr_gray_2;
reg [31:0] data_out_reg;
integer i;
always@(posedge clk1 or negedge rst_n) begin
if(!rst_n)
wr_addr <= 3'd0;
else if(wr_en && (!full))
wr_addr <= wr_addr + 1'd1;
end
always@(posedge clk1 or negedge rst_n)begin
if(!rst_n)
for(i=0;i<7;i=i+1)
mem[i] <= 32'd0;
else if(wr_en && (!full)) begin
mem[wr_addr[2:0]] <= data_in;
end
end
always@(posedge clk2 or negedge rst_n)begin
if(!rst_n)
rd_addr <= 3'd0;
else if(rd_en && (!empty))
rd_addr <= rd_addr + 1'd1;
end
always@(posedge clk2 or negedge rst_n)begin
if(!rst_n)
data_out_reg <= 32'd0;
else if(rd_en && (!empty))
data_out_reg <= mem[rd_addr[2:0]];
end
assign data_out = data_out_reg;
//格雷码转换
assign wr_addr_gray = ((wr_addr>>1) ^ wr_addr);
assign rd_addr_gray = ((rd_addr>>1) ^ rd_addr);
//打2拍同步
always@(posedge clk1 or negedge rst_n) begin//将读指针同步到写时钟域
if(!rst_n) begin
rd_addr_gray_1 <= 3'd0;
rd_addr_gray_2 <= 3'd0;
end
else begin
rd_addr_gray_1 <= rd_addr_gray;
rd_addr_gray_2 <= rd_addr_gray_1;
end
end
always@(posedge clk2 or negedge rst_n) begin//写指针同步到读时钟域
if(!rst_n) begin
wr_addr_gray_1 <= 3'd0;
wr_addr_gray_2 <= 3'd0;
end
else begin
wr_addr_gray_1 <= wr_addr_gray;
wr_addr_gray_2 <= wr_addr_gray_1;
end
end
//空满判断
assign full = (rd_addr_gray_2 == {~wr_addr_gray[3:2],wr_addr_gray[1:0]});//同步后的读指针与未同步的写指针比较
assign empty = (rd_addr_gray == wr_addr_gray_2);
assign margin = (4'd8 - (wr_addr - rd_addr));
endmodule
//位宽为16bit 深度为32的单口RAM
module ram_single(input clk, input [4:0]addr, input [15:0] data_in, input wr_en, input cs, output reg [15:0]data_out);
reg [15:0] mem [31:0];
always@(posedge clk) begin
if(!cs)//未片选该ram
data_out <= 16'dz;
else begin
if(wr_en) begin
mem[addr] <= data_in;
end
else
data_out <= mem[addr];
end
end
endmodule
//位宽为16bit 深度为32的双口RAM
module ram_dual(input clk1, input clk2, input wr_en, input rd_en, input [4:0] wr_addr, input [4:0] rd_addr, input [15:0] data_in, output [15:0] data_out);
reg [15:0] mem [31:0];
always@(posedge clk1) begin
if(wr_en)
mem[wr_addr] <= data_in;
end
always@(posedge clk2) begin
if(rd_en)
data_out <= mem[rd_addr];
end
endmodule
此外,还有ROM,即只读存储器,断电后数据不丢失,代码略。