FPGA实践教程(八)PS与PL共享DDR

背景:很多时候需要PS与PL共享DDR作为global memory,例如卷积之中,PS将weight in与feature写入DDR,然后PL调用DDR进行运算,再将结果写入DDR进行下一次迭代。

目的:1.  PS与PL共享DDR,读和写。并且像卷积一样需要三个指针。2.  IPcore设置变量,能通过PS能查看到IPcore运行位置。3. 运用BRAM实现一定的数据搬运。

目录

一、IPcore编写

1.1 一种错误的接口

1.2 IPcore代码

1.3 位置信息

1.4 接口

  s_axilite

  m_axi

二、testBench

2.1 程序编写

2.2 PS与PL的交互

三、系统搭建与hdf生成

四、SDK

4.1 用malloc的方式开辟内存

4.2 指定指针位置

五、SoC

5.1 交叉编译

5.2 驱动

5.3 运行


一、IPcore编写

1.1 一种错误的接口

int share_dram_core(int write_nums,int read_nums,
					volatile float * write_ptr,volatile float *read_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=write_ptr offset=slave
#pragma HLS INTERFACE m_axi depth=4096 port=read_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=write_nums
#pragma HLS INTERFACE s_axilite port=read_nums
#pragma HLS INTERFACE s_axilite port=location_idx
#pragma HLS INTERFACE s_axilite port=write_loop_idx
#pragma HLS INTERFACE s_axilite port=read_loop_idx
#pragma HLS INTERFACE s_axilite port=read_sum

DRAM上不能有两个m_axi类型的指针,否则可能会遇到重叠等问题。

1.2 IPcore代码

int share_dram_core(int write_nums,int read_nums,
					volatile float * data_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=data_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return register
#pragma HLS INTERFACE s_axilite port=write_nums register
#pragma HLS INTERFACE s_axilite port=read_nums register
#pragma HLS INTERFACE s_axilite port=location_idx register
#pragma HLS INTERFACE s_axilite port=write_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_sum register

	location_idx=0;
	write_loop_idx=0;
	read_loop_idx=0;
	read_sum=0;
	
	for(int read_loc=0;read_loc

只要一个指针指向DRAM。

1.3 位置信息

location_idx表示IPcore当前位置,0表示刚开始,1表示完成写操作,2表示完成读操作

read_loop_idx表示当前IPcore读出DRAM的次数

write_loop_idx表示当前IPcore写入DRAM的次数

return 1表示程序运行完成且成功。

1.4 接口

  s_axilite

运用带return的s_axilite来设置IPcore的值与完成IPcore。传输位置IPcore的位置信息

  m_axi

运用主axi协议运用IPcore对DDR进行读写。只能有一个

Depth的设置问题:可能是IPcore可以读写DDR上的地址。我们设为4096(1024个4字节的浮点数)

二、testBench

2.1 程序编写

#include
#include
int share_dram_core(int write_nums,int read_nums,
					volatile float * data_ptr,
					int location_idx,int write_loop_idx,int read_loop_idx,
					int read_sum);
int main(){
	int PL_write_nums=50;
	int PL_read_nums=50;
	volatile float * PL_write_ptr;
	volatile float * PL_read_ptr;
	
	PL_read_ptr=(volatile float *)malloc(sizeof(float)*(PL_read_nums+PL_write_nums));
	//PL_write_ptr=(volatile float *)malloc(sizeof(float)*PL_write_nums);
	
	//PL_read_ptr=(volatile float *)0x00ac1680;
	PL_write_ptr=&PL_read_ptr[PL_read_nums];

	printf("Initilize SUCCESS!PL_write_num is %d,PL_read_num is %d\n",PL_write_nums,PL_read_nums);
	printf("PL_read_ptr is %8x, PL_write_ptr is %8x \n",PL_write_ptr,PL_read_ptr);
	
	for(int cur_PL_read_loc=0;cur_PL_read_loc

2.2 PS与PL的交互

PS传出数据很简单,但是PL传出数据不易。所以尽量以PS多输出信息来验证PL的正确性。

更多信息通过一些参数传出来。例如location_idx, write_loop_idx; read_loop_idx; read_sum;

INFO: [SIM 4] CSIM will launch GCC as the compiler.
   Compiling ../../../../src/share_dram_HLS_test.cpp in debug mode
   Generating csim.exe
Initilize SUCCESS!PL_write_num is 50,PL_read_num is 50
PL_read_ptr is   a21748, PL_write_ptr is   a21680 
PS write on PL read loc SUCCESS!
Check PL write done!
IPcore result SUCCESS!

synthesis,然后export RTL

三、系统搭建与hdf生成

运用已有的样板文件,hello world。加入HLS的IP。搭建系统。

FPGA实践教程(八)PS与PL共享DDR_第1张图片

使能GP与HP0,自动连接,create HDL wrapper,生成比特流,export到 local include bitstream

四、SDK

//created by Xing Xiangrui on 2018.12.25
//This is the SDK code to test share DRAM
//Write through PS to DDR
//Run PL : read from DDR to PL and write from PL to DDR
//Then read from DDR to PS

#include 
#include 
//#include 
//#include "platform.h"
//#include 
#include "xshare_dram_core.h"

XShare_dram_core XShare_dram_core_instance;

int main()
{
	printf("\n --------------program start------------- \n");

	//read and write param
	int ps_wirte_size=5; int ps_read_size=5;
	int core_location_idx=100;int core_write_loop_idx=100;int core_read_loop_idx=100;int core_read_sum=100;
	int core_return_value=100;
	volatile float * ps_write_ptr;
	volatile float * ps_read_ptr;

	//pointer intialize
	ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
	//ps_write_ptr= 0x10000000;
	ps_read_ptr=&ps_write_ptr[ps_wirte_size];
	if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
	if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
	memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
	memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));

	printf("Initialize ps_read_ptr and ps_write_ptr SUCCESS!\n");
	printf("ps_read_ptr is %8x \n",ps_read_ptr);
	printf("ps_write_ptr is %8x \n",ps_write_ptr);

	for(int cur_print_loc=0;cur_print_loc

用SDK打开vivado生成的文件夹下的 .sdk文件夹然后加载相应的hdf,生成bsp,创建c程序,hello world。build它。

启动FPGA,program FPGA将比特流烧录进去,然后运行程序。

4.1 用malloc的方式开辟内存

	//pointer intialize
	ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
	//ps_write_ptr= 0x10000000;
	ps_read_ptr=&ps_write_ptr[ps_wirte_size];
	if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
	if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
	memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
	memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));

FPGA始终输出0,即IPcore并未有正确的动作。

 --------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is   114764
ps_write_ptr is   114750
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
Calculating...
Calculating...
。。。

IPcore会一直不结束。

4.2 指定指针位置

	ps_write_ptr= 0x10000000;
	ps_read_ptr=&ps_write_ptr[ps_wirte_size];
	if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
	if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
	memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
	memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));

依然无法用IPcore写入值。

 --------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is 10000014
ps_write_ptr is 10000000
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
IsDone done SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
location   0, value 0.000000
location   1, value 0.000000
location   2, value 0.000000
location   3, value 0.000000
location   4, value 0.000000
-----------Program end SUCCESS!-

五、SoC

SDK实现过程中会出现地址冲突的问题,难以实现共享DDR,我们用SoC的方法共享DDR。

5.1 交叉编译

MIZ7035交叉编译单片机程序运行  https://blog.csdn.net/weixin_36474809/article/details/86487043

5.2 驱动

驱动由HLS和vivado生成,相应的地址在vivado中可查。在zynqNet基础上更改:

#ifndef SHARED_DRAM_H_9B5B43B5
#define SHARED_DRAM_H_9B5B43B5

#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 
#include 

#include "xfpga_hw.hpp"   // Register addresses

typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;

// Location + Size of SHARED DRAM segment:
// - from Vivado Block Designer (Address Editor): 
//     AXI M memory bus starts at 0x00000000 – 0xFFFFFFFF, SIZE: 4GB
// - from information by Simon Wright:
// 	   top 128MB of 1GB system memory are not OS-managed
// - from "free -m" on Zynq:
//	   total mem 882MB -> 118MB not OS-managed
// 	   -> place SHARED_DRAM at 896MB (-> max. activations ~100MB)
//	   -> 896MB = 896*1024*1024 = 0x3800'0000 bytes
//	   -> 96MB = 96*1024*1024 = 0x600'0000 bytes

const off_t SHARED_DRAM_BASE_ADDR = 0x20000000; 
const size_t SHARED_DRAM_MEM_SIZE = 0x06000000;
extern int SHARED_DRAM_FD;
extern volatile u32* SHARED_DRAM_PTR;

// External Interface
bool SHARED_DRAM_open();
bool SHARED_DRAM_close();
volatile u32* SHARED_DRAM_virtual();
volatile u32* SHARED_DRAM_physical();

// Internal Functions
volatile u32* map_SHARED_DRAM(off_t base_addr);
void release_SHARED_DRAM(volatile u32* axilite);

// unused:
// 32-bit word read + write (other sizes not supported!)
/* void shared_DRAM_write(u32 byte_addr, u32 value); 
u32 shared_DRAM_read(u32 byte_addr); */

#endif /* end of include guard: SHARED_DRAM_H_9B5B43B5 */
#include "shared_dram.hpp"

int SHARED_DRAM_FD = -1;
volatile u32* SHARED_DRAM_PTR = NULL;

bool SHARED_DRAM_open() {
  printf("XFPGA Driver: open /dev/mem handle\n");
  // Check that it's not yet open
  if (SHARED_DRAM_FD > -1) {
    printf("SHARED_DRAM already open!\n"); 
    return false;
  }
  
  // Memory Map SHARED_DRAM
  SHARED_DRAM_PTR = map_SHARED_DRAM(SHARED_DRAM_BASE_ADDR);
  printf("SHARED_DRAM_PTR=%X\n", (unsigned long)SHARED_DRAM_PTR);
  
  // Make sure the file handle is really set
  return (SHARED_DRAM_FD > -1);
}

bool SHARED_DRAM_close() {
  printf("XFPGA Driver: close /dev/mem handle\n");
  // Check that memory file is really open
  if (SHARED_DRAM_FD == -1) {
    printf("SHARED_DRAM bus not open!\n"); 
    return false;
  }
  // Release Memory Region and File handle
  release_SHARED_DRAM(SHARED_DRAM_PTR);
  // Make sure file was correctly released
  return (SHARED_DRAM_FD == -1);
}

volatile u32* SHARED_DRAM_virtual() {
  return (volatile u32*) SHARED_DRAM_PTR;
}

volatile u32* SHARED_DRAM_physical() {
  return (volatile u32*) SHARED_DRAM_BASE_ADDR;
}

////////////////////////////////////////////////////
////////////////// Helper Functions ////////////////

volatile u32* map_SHARED_DRAM(off_t base_addr) {
  printf("XFPGA Driver: map shared DRAM at base address %X\n", (unsigned long)base_addr);
  // make sure that base addr is aligned to memory pages...
  base_addr &= ~(getpagesize() - 1);

  // Open /dev/mem file (need root privileges or setuid!)
  SHARED_DRAM_FD = open("/dev/mem", O_RDWR);
  if (SHARED_DRAM_FD < 0) err(errno, "could not open /dev/mem. need to be root");

  // Map SHARED_DRAM memory region to pointer
  volatile u32* pointer = (u32*)mmap(NULL, SHARED_DRAM_MEM_SIZE, PROT_READ | PROT_WRITE,
                            MAP_SHARED, SHARED_DRAM_FD, base_addr);
  if (pointer == MAP_FAILED) err(errno, "could not map memory for SHARED_DRAM bus");
  return pointer;
}

void release_SHARED_DRAM(volatile u32* pointer) {
  printf("XFPGA Driver: unmap shared DRAM\n");
  // Release SHARED_DRAM memory region (unmap)
  int retval = munmap((void*)pointer, SHARED_DRAM_MEM_SIZE);
  if (retval < 0) err(errno, "could not unmap memory region for SHARED_DRAM bus");
  
  // release file handle
  retval = close(SHARED_DRAM_FD);
  if (retval < 0) err(errno, "could not release /dev/mem file handle");

  // set file handle variable s.t. we know it's closed
  SHARED_DRAM_FD = -1;
}

5.3 运行

交叉编译,挂载,运行

你可能感兴趣的:(FPGA,FPGA实践教程)