背景:很多时候需要PS与PL共享DDR作为global memory,例如卷积之中,PS将weight in与feature写入DDR,然后PL调用DDR进行运算,再将结果写入DDR进行下一次迭代。
目的:1. PS与PL共享DDR,读和写。并且像卷积一样需要三个指针。2. IPcore设置变量,能通过PS能查看到IPcore运行位置。3. 运用BRAM实现一定的数据搬运。
目录
一、IPcore编写
1.1 一种错误的接口
1.2 IPcore代码
1.3 位置信息
1.4 接口
s_axilite
m_axi
二、testBench
2.1 程序编写
2.2 PS与PL的交互
三、系统搭建与hdf生成
四、SDK
4.1 用malloc的方式开辟内存
4.2 指定指针位置
五、SoC
5.1 交叉编译
5.2 驱动
5.3 运行
int share_dram_core(int write_nums,int read_nums,
volatile float * write_ptr,volatile float *read_ptr,
int location_idx,int write_loop_idx,int read_loop_idx,
int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=write_ptr offset=slave
#pragma HLS INTERFACE m_axi depth=4096 port=read_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=write_nums
#pragma HLS INTERFACE s_axilite port=read_nums
#pragma HLS INTERFACE s_axilite port=location_idx
#pragma HLS INTERFACE s_axilite port=write_loop_idx
#pragma HLS INTERFACE s_axilite port=read_loop_idx
#pragma HLS INTERFACE s_axilite port=read_sum
DRAM上不能有两个m_axi类型的指针,否则可能会遇到重叠等问题。
int share_dram_core(int write_nums,int read_nums,
volatile float * data_ptr,
int location_idx,int write_loop_idx,int read_loop_idx,
int read_sum){
#pragma HLS INTERFACE m_axi depth=4096 port=data_ptr offset=slave
#pragma HLS INTERFACE s_axilite port=return register
#pragma HLS INTERFACE s_axilite port=write_nums register
#pragma HLS INTERFACE s_axilite port=read_nums register
#pragma HLS INTERFACE s_axilite port=location_idx register
#pragma HLS INTERFACE s_axilite port=write_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_loop_idx register
#pragma HLS INTERFACE s_axilite port=read_sum register
location_idx=0;
write_loop_idx=0;
read_loop_idx=0;
read_sum=0;
for(int read_loc=0;read_loc
只要一个指针指向DRAM。
location_idx表示IPcore当前位置,0表示刚开始,1表示完成写操作,2表示完成读操作
read_loop_idx表示当前IPcore读出DRAM的次数
write_loop_idx表示当前IPcore写入DRAM的次数
return 1表示程序运行完成且成功。
运用带return的s_axilite来设置IPcore的值与完成IPcore。传输位置IPcore的位置信息
运用主axi协议运用IPcore对DDR进行读写。只能有一个
Depth的设置问题:可能是IPcore可以读写DDR上的地址。我们设为4096(1024个4字节的浮点数)
#include
#include
int share_dram_core(int write_nums,int read_nums,
volatile float * data_ptr,
int location_idx,int write_loop_idx,int read_loop_idx,
int read_sum);
int main(){
int PL_write_nums=50;
int PL_read_nums=50;
volatile float * PL_write_ptr;
volatile float * PL_read_ptr;
PL_read_ptr=(volatile float *)malloc(sizeof(float)*(PL_read_nums+PL_write_nums));
//PL_write_ptr=(volatile float *)malloc(sizeof(float)*PL_write_nums);
//PL_read_ptr=(volatile float *)0x00ac1680;
PL_write_ptr=&PL_read_ptr[PL_read_nums];
printf("Initilize SUCCESS!PL_write_num is %d,PL_read_num is %d\n",PL_write_nums,PL_read_nums);
printf("PL_read_ptr is %8x, PL_write_ptr is %8x \n",PL_write_ptr,PL_read_ptr);
for(int cur_PL_read_loc=0;cur_PL_read_loc
PS传出数据很简单,但是PL传出数据不易。所以尽量以PS多输出信息来验证PL的正确性。
更多信息通过一些参数传出来。例如location_idx, write_loop_idx; read_loop_idx; read_sum;
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../../src/share_dram_HLS_test.cpp in debug mode
Generating csim.exe
Initilize SUCCESS!PL_write_num is 50,PL_read_num is 50
PL_read_ptr is a21748, PL_write_ptr is a21680
PS write on PL read loc SUCCESS!
Check PL write done!
IPcore result SUCCESS!
synthesis,然后export RTL
运用已有的样板文件,hello world。加入HLS的IP。搭建系统。
使能GP与HP0,自动连接,create HDL wrapper,生成比特流,export到 local include bitstream
//created by Xing Xiangrui on 2018.12.25
//This is the SDK code to test share DRAM
//Write through PS to DDR
//Run PL : read from DDR to PL and write from PL to DDR
//Then read from DDR to PS
#include
#include
//#include
//#include "platform.h"
//#include
#include "xshare_dram_core.h"
XShare_dram_core XShare_dram_core_instance;
int main()
{
printf("\n --------------program start------------- \n");
//read and write param
int ps_wirte_size=5; int ps_read_size=5;
int core_location_idx=100;int core_write_loop_idx=100;int core_read_loop_idx=100;int core_read_sum=100;
int core_return_value=100;
volatile float * ps_write_ptr;
volatile float * ps_read_ptr;
//pointer intialize
ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
//ps_write_ptr= 0x10000000;
ps_read_ptr=&ps_write_ptr[ps_wirte_size];
if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
printf("Initialize ps_read_ptr and ps_write_ptr SUCCESS!\n");
printf("ps_read_ptr is %8x \n",ps_read_ptr);
printf("ps_write_ptr is %8x \n",ps_write_ptr);
for(int cur_print_loc=0;cur_print_loc
用SDK打开vivado生成的文件夹下的 .sdk文件夹然后加载相应的hdf,生成bsp,创建c程序,hello world。build它。
启动FPGA,program FPGA将比特流烧录进去,然后运行程序。
//pointer intialize
ps_write_ptr=(volatile float *)malloc((ps_wirte_size+ps_read_size)*sizeof(float));
//ps_write_ptr= 0x10000000;
ps_read_ptr=&ps_write_ptr[ps_wirte_size];
if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
FPGA始终输出0,即IPcore并未有正确的动作。
--------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is 114764
ps_write_ptr is 114750
location 0, value 0.000000
location 1, value 0.000000
location 2, value 0.000000
location 3, value 0.000000
location 4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=0
Calculating...
Calculating...
。。。
IPcore会一直不结束。
ps_write_ptr= 0x10000000;
ps_read_ptr=&ps_write_ptr[ps_wirte_size];
if(ps_write_ptr==NULL)printf("Malloc ps_write_ptr failure \n");
if(ps_read_ptr==NULL)printf("Malloc ps_read_ptr failure \n");
memset((void*)ps_write_ptr,0,ps_wirte_size*sizeof(float));
memset((void*)ps_read_ptr,0,ps_read_size*sizeof(float));
依然无法用IPcore写入值。
--------------program start-------------
Initialize ps_read_ptr and ps_write_ptr SUCCESS!
ps_read_ptr is 10000014
ps_write_ptr is 10000000
location 0, value 0.000000
location 1, value 0.000000
location 2, value 0.000000
location 3, value 0.000000
location 4, value 0.000000
XShare_dram_core_Initialize SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------Core value set SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
-------------IPCore start SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
IsDone done SUCCESS!
core_location_idx=0
core_write_loop_idx=0
core_read_loop_idx=0
core_read_sum=0
core_return_value=1
location 0, value 0.000000
location 1, value 0.000000
location 2, value 0.000000
location 3, value 0.000000
location 4, value 0.000000
-----------Program end SUCCESS!-
SDK实现过程中会出现地址冲突的问题,难以实现共享DDR,我们用SoC的方法共享DDR。
MIZ7035交叉编译单片机程序运行 https://blog.csdn.net/weixin_36474809/article/details/86487043
驱动由HLS和vivado生成,相应的地址在vivado中可查。在zynqNet基础上更改:
#ifndef SHARED_DRAM_H_9B5B43B5
#define SHARED_DRAM_H_9B5B43B5
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "xfpga_hw.hpp" // Register addresses
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
// Location + Size of SHARED DRAM segment:
// - from Vivado Block Designer (Address Editor):
// AXI M memory bus starts at 0x00000000 – 0xFFFFFFFF, SIZE: 4GB
// - from information by Simon Wright:
// top 128MB of 1GB system memory are not OS-managed
// - from "free -m" on Zynq:
// total mem 882MB -> 118MB not OS-managed
// -> place SHARED_DRAM at 896MB (-> max. activations ~100MB)
// -> 896MB = 896*1024*1024 = 0x3800'0000 bytes
// -> 96MB = 96*1024*1024 = 0x600'0000 bytes
const off_t SHARED_DRAM_BASE_ADDR = 0x20000000;
const size_t SHARED_DRAM_MEM_SIZE = 0x06000000;
extern int SHARED_DRAM_FD;
extern volatile u32* SHARED_DRAM_PTR;
// External Interface
bool SHARED_DRAM_open();
bool SHARED_DRAM_close();
volatile u32* SHARED_DRAM_virtual();
volatile u32* SHARED_DRAM_physical();
// Internal Functions
volatile u32* map_SHARED_DRAM(off_t base_addr);
void release_SHARED_DRAM(volatile u32* axilite);
// unused:
// 32-bit word read + write (other sizes not supported!)
/* void shared_DRAM_write(u32 byte_addr, u32 value);
u32 shared_DRAM_read(u32 byte_addr); */
#endif /* end of include guard: SHARED_DRAM_H_9B5B43B5 */
#include "shared_dram.hpp"
int SHARED_DRAM_FD = -1;
volatile u32* SHARED_DRAM_PTR = NULL;
bool SHARED_DRAM_open() {
printf("XFPGA Driver: open /dev/mem handle\n");
// Check that it's not yet open
if (SHARED_DRAM_FD > -1) {
printf("SHARED_DRAM already open!\n");
return false;
}
// Memory Map SHARED_DRAM
SHARED_DRAM_PTR = map_SHARED_DRAM(SHARED_DRAM_BASE_ADDR);
printf("SHARED_DRAM_PTR=%X\n", (unsigned long)SHARED_DRAM_PTR);
// Make sure the file handle is really set
return (SHARED_DRAM_FD > -1);
}
bool SHARED_DRAM_close() {
printf("XFPGA Driver: close /dev/mem handle\n");
// Check that memory file is really open
if (SHARED_DRAM_FD == -1) {
printf("SHARED_DRAM bus not open!\n");
return false;
}
// Release Memory Region and File handle
release_SHARED_DRAM(SHARED_DRAM_PTR);
// Make sure file was correctly released
return (SHARED_DRAM_FD == -1);
}
volatile u32* SHARED_DRAM_virtual() {
return (volatile u32*) SHARED_DRAM_PTR;
}
volatile u32* SHARED_DRAM_physical() {
return (volatile u32*) SHARED_DRAM_BASE_ADDR;
}
////////////////////////////////////////////////////
////////////////// Helper Functions ////////////////
volatile u32* map_SHARED_DRAM(off_t base_addr) {
printf("XFPGA Driver: map shared DRAM at base address %X\n", (unsigned long)base_addr);
// make sure that base addr is aligned to memory pages...
base_addr &= ~(getpagesize() - 1);
// Open /dev/mem file (need root privileges or setuid!)
SHARED_DRAM_FD = open("/dev/mem", O_RDWR);
if (SHARED_DRAM_FD < 0) err(errno, "could not open /dev/mem. need to be root");
// Map SHARED_DRAM memory region to pointer
volatile u32* pointer = (u32*)mmap(NULL, SHARED_DRAM_MEM_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, SHARED_DRAM_FD, base_addr);
if (pointer == MAP_FAILED) err(errno, "could not map memory for SHARED_DRAM bus");
return pointer;
}
void release_SHARED_DRAM(volatile u32* pointer) {
printf("XFPGA Driver: unmap shared DRAM\n");
// Release SHARED_DRAM memory region (unmap)
int retval = munmap((void*)pointer, SHARED_DRAM_MEM_SIZE);
if (retval < 0) err(errno, "could not unmap memory region for SHARED_DRAM bus");
// release file handle
retval = close(SHARED_DRAM_FD);
if (retval < 0) err(errno, "could not release /dev/mem file handle");
// set file handle variable s.t. we know it's closed
SHARED_DRAM_FD = -1;
}
交叉编译,挂载,运行