ZynqMP SOC是xilinx公司推出的面向AI应用(如:云计算、边缘计算)、高度灵活、高性能的处理器系列,我们选择它作为产品的处理器平台,以保证未来的可持续演进。本文简要说明在基于Zynq MPSOC的产品开发中,如何在Linux的用户面实现AXI DMA传输。
Linux内核提供了dma的驱动框架,为在内核空间添加不同的dma驱动提供了标准的API接口,该框架支持多种形式的dma传输,如:内存与内存之间的axi-cdma,内存到设备或者设备到内存的axi-dma等。
Xilinx则针对自己的SOC芯片,基于Linux dma驱动框架的基础上,提供了多种dma驱动引擎,允许用户在内核空间编写特定的dma驱动代码。但是,上述无论是linux的 dma框架,还是xilinx的dma驱动引擎,都没有提供用户面的接口,也就是说,要在用户空间使用dma,还需要另外编写代码来实现。
Xilinx的FPGA的dma-ip在linux内核中的驱动代码在文件drivers/dma/xilinx/xilinx-dma.c中,添加的user plane代码包括了一系列函数以及新增的数据结构,这部分代码在xilinx-dma.c文件中实现。几个要点:
1、dma缓冲区采用ping-pang buffer,缓冲区在内核驱动中予以分配,在驱动代码中提供了mmap接口,在user plane中程序可以访问该缓冲区,不需要进行数据从内核空间到用户空间的拷贝;
2、user plane程序通过ioctl()来启动dma、获取dma的数据;
3、在我们的应用中,dma数据传输是单方向的,即:从fpga到A53,且每一次dma传输的数据帧很大,约600K+ bytes;
4、在fpga中使用了2个axi-dma IP,连接到MPSOC的HPC口,在linux的设备树中,配置了2路axi-dma;
相关的代码片段如下:
/**
* Map the dma memory into user space
*/
static int zynqmp_axidma_mmap(struct file *file_p, struct vm_area_struct *vma)
{
struct xilinx_dma_device *pchannel_p;
pchannel_p = (struct xilinx_dma_device *)file_p->private_data;
return dma_mmap_coherent(pchannel_p->dev, vma,
pchannel_p->interface_p, pchannel_p->interface_phys_addr,
vma->vm_end - vma->vm_start);
}
/**
* Open the device file
*/
static int zynqmp_axidma_open(struct inode *inode, struct file *file)
{
/* get the address of 'zynqmp_dma_device' structure */
file->private_data = container_of(inode->i_cdev, struct xilinx_dma_device, cdev);
return 0;
}
/**
* Close the file
*/
static int zynqmp_axidma_release(struct inode *inode, struct file *file)
{
struct xilinx_dma_device *xdev;
struct dma_device *dma_device;
int i;
xdev = (struct xilinx_dma_device *)file->private_data;
dma_device = &xdev->common;
/* Stop all the activity when the channel is closed assuming this
* may help if the application is aborted without normal close
*/
for( i = 0 ; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) {
dma_device->device_terminate_all(& (xdev->chan[ i ] ->common) );
}
return 0;
}
/**
* Perform I/O control to start a DMA transfer.
*/
static long zynqmp_axidma_ioctl(struct file *file, unsigned int cmd , unsigned long arg)
{
int status = 0;
int channo = 0;
unsigned int idx;
struct xilinx_dma_device *zynqdma_device = NULL;
struct dma_async_tx_descriptor *chan_desc = NULL;
struct dma_device *pdma = NULL;
struct dma_chan *chan = NULL;
struct scatterlist rx_sg; //dma rx channel
enum dma_ctrl_flags flags;
AxiDmaUserPrm UserPrm;
zynqdma_device = (struct xilinx_dma_device *)file->private_data;
pdma = &(zynqdma_device->common); // get pointer of dma_device
chan = &(zynqdma_device->chan[channo]->common); // Note: tricky ! because I use '...chan[0]' to
// get pointer of dma channel
if( !DMA_IOCTL_CMD_IS_VALID( cmd ) ) {
return -1;
}
cmd = DMA_IOCTL_CMD_GET( cmd );
flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
switch( cmd ) {
case DMA_CMD_SET:
/* RX: allocate DMA ping-pang memory for receivng data from FPGA
*/
zynqdma_device->interface_p = (struct dma_proxy_channel_interface *)
dmam_alloc_coherent(zynqdma_device->dev,
sizeof(struct dma_proxy_channel_interface),
&zynqdma_device->interface_phys_addr, GFP_KERNEL);
printk(KERN_INFO "Allocating uncached memory at viraddr: 0x%x, phyaddr: 0x%x\n",
zynqdma_device->interface_p, (void *)zynqdma_device->interface_phys_addr);
if (!zynqdma_device->interface_p) {
dev_err(zynqdma_device->dev, "DMA memory allocation fail\n");
return -1;
}
zynqdma_device->chan[channo]->phybase = zynqdma_device->interface_phys_addr;
zynqdma_device->chan[channo]->dma_buf_idx = 0; //used for ping-pang buffer switch
zynqdma_device->chan[channo]->DmaPacketRdy = DMA_PACKET_NORDY;
// transfer this parameter to user space
UserPrm.bufIdx = zynqdma_device->chan[channo]->dma_buf_idx;
UserPrm.PhyAddr = zynqdma_device->interface_phys_addr;
UserPrm.VirAddr = (unsigned int )zynqdma_device->interface_p;
status = copy_to_user( (void *)arg,
&UserPrm,
sizeof(UserPrm));
if( status != 0 ) {
printk(KERN_INFO"DMA_CMD_SET:hand dma para. to user plane fail\n");
break;
}
/* For now, I only use a single entry in a scatter gather list, just for future
* flexibility for scatter gather.
*/
sg_init_table( &rx_sg, BD_RX_CNT);
sg_dma_address( &rx_sg ) = zynqdma_device->interface_phys_addr; //physical address
sg_dma_len( &rx_sg ) = (sizeof(struct dma_proxy_channel_interface) ) >> 1; // 1/2 * DMABUF_SIZE
chan_desc = pdma->device_prep_slave_sg( chan,
&rx_sg,
BD_RX_CNT,
DMA_DEV_TO_MEM,
flags,
NULL);
if( !chan_desc ) {
printk(KERN_ERR "prep_dma_sg (*) fail\n");
status = -1;
break;
}
chan_desc->callback = sync_callback;
chan_desc->callback_param = &(zynqdma_device->chan[channo]->dma_buf_idx );
init_completion( &zynqdma_device->cmp );
zynqdma_device->cookie = chan_desc->tx_submit( chan_desc );
if( dma_submit_error( zynqdma_device->cookie )) {
printk(KERN_INFO"dma submit error\n");
status = -1;
break;
}
/* Start the DMA transaction which was previous queued up in DMA engine
*/
dma_async_issue_pending( chan );
break;
case DMA_CMD_COPY:
wait_event_interruptible( dma_data_wait,
(zynqdma_device->chan[channo]->DmaPacketRdy) == DMA_PACKET_RDY );
// transfer this parameter to user space
idx = zynqdma_device->chan[channo]->dma_buf_idx;
UserPrm.bufIdx = idx;
UserPrm.PhyAddr = (unsigned int)(zynqdma_device->interface_phys_addr + idx * (DMABUF_SIZE >> 1));
UserPrm.VirAddr = ( (unsigned int)zynqdma_device->interface_p + idx *(DMABUF_SIZE >> 1));
status = copy_to_user( (void *)arg,
&UserPrm,
sizeof(UserPrm));
if( status != 0 ) {
printk(KERN_INFO"DMA_CMD_COPY:hand dma para. to user plane fail\n");
break;
}
zynqdma_device->chan[channo]->DmaPacketRdy = DMA_PACKET_NORDY;
status = 0;
break;
default:
break;
}
return status;
}
static struct file_operations dm_fops = {
.owner = THIS_MODULE,
.open = zynqmp_axidma_open,
.release = zynqmp_axidma_release,
.unlocked_ioctl = zynqmp_axidma_ioctl,
.mmap = zynqmp_axidma_mmap
};
/**
* Initialize the driver to be a character device
*/
static int cdevice_init(struct xilinx_dma_device *xdev, char *name)
{
int rc;
static struct class *local_class_p = NULL;
/* Allocate a character device from the kernel for this driver.
*/
rc = alloc_chrdev_region(&xdev->devno, 0, 1, DRIVER_NAME);
if (rc) {
dev_err(xdev->dev, "unable to get a char device number\n");
return rc;
}
/* Initialize the device data structure before registering the character
* device with the kernel.
*/
cdev_init(&xdev->cdev, &dm_fops);
xdev->cdev.owner = THIS_MODULE;
rc = cdev_add(&xdev->cdev, xdev->devno, 1);
if (rc) {
dev_err(xdev->dev, "Err [%d] while adding device [%s]\n", rc, DRIVER_NAME);
goto init_error1;
}
/* Create the device in sysfs which will allow the device node
* in /dev to be created
*/
if (!local_class_p) {
local_class_p = class_create(THIS_MODULE, DRIVER_NAME);
if (IS_ERR(xdev->dev->class)) {
dev_err(xdev->dev, "unable to create class\n");
rc = -1;
goto init_error2;
}
}
xdev->class_p = local_class_p;
/* Create the device node in /dev so the device is accessible
* as a character device
*/
xdev->proxy_device_p = device_create(xdev->class_p, NULL,
xdev->devno, NULL, name);
if (IS_ERR(xdev->proxy_device_p)) {
dev_err(xdev->dev, "unable to create the device\n");
goto init_error3;
}
return 0;
init_error3:
class_destroy(xdev->class_p);
init_error2:
cdev_del(&xdev->cdev);
init_error1:
unregister_chrdev_region(xdev->devno, 1);
return rc;
}
/**============================================
* end of added code
* =============================================*/
#endif
驱动程序运行后,在/dev下可以看到生成了如下两个dma设备文件:zynqmp_a0000000.dma,及zynqmp_a0001000.dma,通过ioctl()接口即可访问dma的memory,如下:
gDmaFd = open(“/dev/zynqmp_a0000000.dma”, O_RDWR);
……
DmaRet = ioctl( gDmaFd, DmaCmd, &DmaMemAddr);
… …
通过在dma驱动中添加用户接口,实现了在用户面控制dma启动、读取dma数据,在本实现中,用户面程序可以通过IOCTL()接口访问dma的缓冲区,从而避免了大量的数据在用户空间与内核空间的拷贝。