高层次综合(High Level Synthesis, HLS)是Xilinx公司推出的最新一代的FPGA设计工具,它能让用户通过编写C/C++等高级语言代码实现RTL级的硬件功能。随着这款工具的出现,软硬之间的区别越来越模糊,即使你对于硬件完全不懂,你也能编写出符合工程功能要求的RTL代码。看到HLS工具具有如此神奇的功能,你是否想立即尝试一下呢?
接下来我们将谈谈HLS相关的简单操作以及C/C++到VHDL的一个转换关系:
首先确保你已经下好了vivado相关套件,并且HLS的license(官网有试用的,但只能用一个月)已经下到,然后打开桌面上,就可以按照一般的软件建工程一样,这里我们写了一个很简单的函数int andfunction(int array[2],int array1[2]);这里我们注意到函数返回值是整形,同时两个形参是都是数组,这个函数代码如下:
int andfunction(int array[2],int array1[2])
{
int i,sum;
sum=0;
for(i=0;i<2;i++)
{
array[i]=i*2;
array1[i]=array[i]+i;
sum+=array1[i];
}
return sum;
}
写好了这个函数后,那么我们不妨看看综合后的VHDL是怎样的。点击绿色三角按钮后生成硬件描述语言,solution1->syn->vhdl里可以看到生成的VHDL,代码如下:
-- ==============================================================
-- RTL generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC
-- Version: 2012.4
-- Copyright (C) 2012 Xilinx Inc. All rights reserved.
--
-- ===========================================================
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.numeric_std.all;
entity andfunction is
port (
ap_clk : IN STD_LOGIC;
ap_rst : IN STD_LOGIC;
ap_start : IN STD_LOGIC;
ap_done : OUT STD_LOGIC;
ap_idle : OUT STD_LOGIC;
ap_ready : OUT STD_LOGIC;
array_r_address0 : OUT STD_LOGIC_VECTOR (0 downto 0);
array_r_ce0 : OUT STD_LOGIC;
array_r_we0 : OUT STD_LOGIC;
array_r_d0 : OUT STD_LOGIC_VECTOR (31 downto 0);
array1_address0 : OUT STD_LOGIC_VECTOR (0 downto 0);
array1_ce0 : OUT STD_LOGIC;
array1_we0 : OUT STD_LOGIC;
array1_d0 : OUT STD_LOGIC_VECTOR (31 downto 0);
ap_return : OUT STD_LOGIC_VECTOR (31 downto 0) );
end;
architecture behav of andfunction is
attribute CORE_GENERATION_INFO : STRING;
attribute CORE_GENERATION_INFO of behav : architecture is
"andfunction,hls_ip_2012_4,{HLS_INPUT_TYPE=c,HLS_INPUT_FLOAT=0,HLS_INPUT_FIXED=0,HLS_INPUT_PART=xc5vlx110tff1136-1,HLS_INPUT_CLOCK=10.000000,HLS_INPUT_ARCH=others,HLS_SYN_CLOCK=3.090000,HLS_SYN_LAT=3,HLS_SYN_TPT=none,HLS_SYN_MEM=0,HLS_SYN_DSP=0,HLS_SYN_FF=3,HLS_SYN_LUT=8}";
constant ap_const_logic_1 : STD_LOGIC := '1';
constant ap_const_logic_0 : STD_LOGIC := '0';
constant ap_ST_st1_fsm_0 : STD_LOGIC_VECTOR (0 downto 0) := "0";
constant ap_ST_st2_fsm_1 : STD_LOGIC_VECTOR (0 downto 0) := "1";
constant ap_const_lv2_0 : STD_LOGIC_VECTOR (1 downto 0) := "00";
constant ap_const_lv1_0 : STD_LOGIC_VECTOR (0 downto 0) := "0";
constant ap_const_lv2_2 : STD_LOGIC_VECTOR (1 downto 0) := "10";
constant ap_const_lv2_1 : STD_LOGIC_VECTOR (1 downto 0) := "01";
constant ap_const_lv32_3 : STD_LOGIC_VECTOR (31 downto 0) := "00000000000000000000000000000011";
signal ap_CS_fsm : STD_LOGIC_VECTOR (0 downto 0) := "0";
signal i_1_fu_66_p2 : STD_LOGIC_VECTOR (1 downto 0);
signal i_reg_42 : STD_LOGIC_VECTOR (1 downto 0);
signal exitcond1_fu_60_p2 : STD_LOGIC_VECTOR (0 downto 0);
signal i_cast_fu_54_p1 : STD_LOGIC_VECTOR (31 downto 0);
signal tmp_fu_72_p2 : STD_LOGIC_VECTOR (1 downto 0);
signal tmp_1_fu_83_p2 : STD_LOGIC_VECTOR (1 downto 0);
signal ap_NS_fsm : STD_LOGIC_VECTOR (0 downto 0);
begin
-- the current state (ap_CS_fsm) of the state machine. --
ap_CS_fsm_assign_proc : process(ap_clk)
begin
if (ap_clk'event and ap_clk = '1') then
if (ap_rst = '1') then
ap_CS_fsm <= ap_ST_st1_fsm_0;
else
ap_CS_fsm <= ap_NS_fsm;
end if;
end if;
end process;
-- ap_reg assign process. --
ap_reg_proc : process(ap_clk)
begin
if (ap_clk'event and ap_clk = '1') then
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0))) then
i_reg_42 <= i_1_fu_66_p2;
elsif (((ap_ST_st1_fsm_0 = ap_CS_fsm) and not((ap_start = ap_const_logic_0)))) then
i_reg_42 <= ap_const_lv2_0;
end if;
end if;
end process;
-- the next state (ap_NS_fsm) of the state machine. --
ap_NS_fsm_assign_proc : process(ap_start, ap_CS_fsm, exitcond1_fu_60_p2)
begin
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and not((exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
ap_NS_fsm <= ap_ST_st1_fsm_0;
elsif ((((ap_ST_st1_fsm_0 = ap_CS_fsm) and not((ap_start = ap_const_logic_0))) or ((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
ap_NS_fsm <= ap_ST_st2_fsm_1;
else
ap_NS_fsm <= ap_CS_fsm;
end if;
end process;
-- ap_done assign process. --
ap_done_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and not((exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
ap_done <= ap_const_logic_1;
else
ap_done <= ap_const_logic_0;
end if;
end process;
-- ap_idle assign process. --
ap_idle_assign_proc : process(ap_start, ap_CS_fsm)
begin
if ((not((ap_const_logic_1 = ap_start)) and (ap_ST_st1_fsm_0 = ap_CS_fsm))) then
ap_idle <= ap_const_logic_1;
else
ap_idle <= ap_const_logic_0;
end if;
end process;
-- ap_ready assign process. --
ap_ready_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and not((exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
ap_ready <= ap_const_logic_1;
else
ap_ready <= ap_const_logic_0;
end if;
end process;
ap_return <= ap_const_lv32_3;
array1_address0 <= i_cast_fu_54_p1(1 - 1 downto 0);
-- array1_ce0 assign process. --
array1_ce0_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0))) then
array1_ce0 <= ap_const_logic_1;
else
array1_ce0 <= ap_const_logic_0;
end if;
end process;
array1_d0 <= std_logic_vector(resize(unsigned(tmp_1_fu_83_p2),32));
-- array1_we0 assign process. --
array1_we0_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if ((((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
array1_we0 <= ap_const_logic_1;
else
array1_we0 <= ap_const_logic_0;
end if;
end process;
array_r_address0 <= i_cast_fu_54_p1(1 - 1 downto 0);
-- array_r_ce0 assign process. --
array_r_ce0_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if (((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0))) then
array_r_ce0 <= ap_const_logic_1;
else
array_r_ce0 <= ap_const_logic_0;
end if;
end process;
array_r_d0 <= std_logic_vector(resize(unsigned(tmp_fu_72_p2),32));
-- array_r_we0 assign process. --
array_r_we0_assign_proc : process(ap_CS_fsm, exitcond1_fu_60_p2)
begin
if ((((ap_ST_st2_fsm_1 = ap_CS_fsm) and (exitcond1_fu_60_p2 = ap_const_lv1_0)))) then
array_r_we0 <= ap_const_logic_1;
else
array_r_we0 <= ap_const_logic_0;
end if;
end process;
exitcond1_fu_60_p2 <= "1" when (i_reg_42 = ap_const_lv2_2) else "0";
i_1_fu_66_p2 <= std_logic_vector(unsigned(i_reg_42) + unsigned(ap_const_lv2_1));
i_cast_fu_54_p1 <= std_logic_vector(resize(unsigned(i_reg_42),32));
tmp_1_fu_83_p2 <= std_logic_vector(unsigned(tmp_fu_72_p2) + unsigned(i_reg_42));
tmp_fu_72_p2 <= std_logic_vector(shift_left(unsigned(i_reg_42),to_integer(unsigned('0' & ap_const_lv2_1(2-1 downto 0)))));
end behav;
看到这么长一大串代码后,你或许会说看起来太复杂来人,还不如自己写;一两个简单的可以自己写,成千上万个呢?这里我们不去关注architecture里面具体的实现过程(这里面包含很多优化),这里我们仅仅讨论从C/C++到VHDL的entity的关系。
看到VHDL后,你也许第一眼就看到了VHDL中的entity了,但是你不一定理解其管脚到底指代什么,下面给出一张int andfunction(int array[2],int array1[2])这个函数的硬件结构图
硬件引脚的具体含义是(这部分是参考的http://xilinx.eetop.cn/viewnews-1483):
ap_clk:设计的时钟信号
ap_rst:设计的复位信号
ap_start:开始计算的开始信号
ap_done:计算结束和输出就绪的完成信号
ap_idle:表示实体(设计)空闲的空闲信号
ap_ready:表示设计为新输入数据做好准备,与ap_idle 配合使用
ap_return:设计的返回值
name_address:存储器的读地址 (name指代array或array1,因为它们都是数组,在硬件中综合成了寄存器)
name_ce0:存储器的芯片使能
name_we0:存储器的写使能
name_do0:存储器的写数据
ap_return: 函数返回值端口
分析上面的硬件管脚,其中ap_clk、ap_rst、ap_start、ap_done、ap_idle、ap_ready为大多数综合后硬件默认必有的,因为这些引脚便于处理器对其进行控制;name_address0、name_ce0、name_we0、name_do0这些引脚是为了获取所使用到寄存器的状态,在这里我们应当注意:数组开辟的空间尽量不要大,过大容易造成硬件资源不足,无法开辟满足要求的寄存器。
今天就对HLS从C/C++到VHDL的转换大致说到这里,由于刚接触这一块,难免有所纰漏,欢迎大家指出!