Zynq MPSOC上用户面DMA传输的实现

一、引言

ZynqMP SOC是xilinx公司推出的面向AI应用(如:云计算、边缘计算)、高度灵活、高性能的处理器系列,我们选择它作为产品的处理器平台,以保证未来的可持续演进。本文简要说明在基于Zynq MPSOC的产品开发中,如何在Linux的用户面实现AXI DMA传输。
Linux内核提供了dma的驱动框架,为在内核空间添加不同的dma驱动提供了标准的API接口,该框架支持多种形式的dma传输,如:内存与内存之间的axi-cdma,内存到设备或者设备到内存的axi-dma等。

二、用户面dma传输的实现

Xilinx则针对自己的SOC芯片,基于Linux dma驱动框架的基础上,提供了多种dma驱动引擎,允许用户在内核空间编写特定的dma驱动代码。但是,上述无论是linux的 dma框架,还是xilinx的dma驱动引擎,都没有提供用户面的接口,也就是说,要在用户空间使用dma,还需要另外编写代码来实现。
Xilinx的FPGA的dma-ip在linux内核中的驱动代码在文件drivers/dma/xilinx/xilinx-dma.c中,添加的user plane代码包括了一系列函数以及新增的数据结构,这部分代码在xilinx-dma.c文件中实现。几个要点:
1、dma缓冲区采用ping-pang buffer,缓冲区在内核驱动中予以分配,在驱动代码中提供了mmap接口,在user plane中程序可以访问该缓冲区,不需要进行数据从内核空间到用户空间的拷贝;
2、user plane程序通过ioctl()来启动dma、获取dma的数据;
3、在我们的应用中,dma数据传输是单方向的,即:从fpga到A53,且每一次dma传输的数据帧很大,约600K+ bytes;
4、在fpga中使用了2个axi-dma IP,连接到MPSOC的HPC口,在linux的设备树中,配置了2路axi-dma;

相关的代码片段如下:

/**
* Map the dma memory into user space 
*/
static int zynqmp_axidma_mmap(struct file *file_p, struct vm_area_struct *vma)
{
	struct xilinx_dma_device *pchannel_p;


	pchannel_p = (struct xilinx_dma_device *)file_p->private_data;
	return dma_mmap_coherent(pchannel_p->dev, vma,
					   pchannel_p->interface_p, pchannel_p->interface_phys_addr,
					   vma->vm_end - vma->vm_start);
}

/**
* Open the device file 
*/
static int zynqmp_axidma_open(struct inode *inode, struct file *file)
{
	/* get the address of 'zynqmp_dma_device' structure */
	file->private_data = container_of(inode->i_cdev, struct xilinx_dma_device, cdev);
	return 0;
}


/**
* Close the file 
*/
static int zynqmp_axidma_release(struct inode *inode, struct file *file)
{
	struct xilinx_dma_device *xdev;
	struct dma_device *dma_device;
	int	i;

	xdev = (struct xilinx_dma_device *)file->private_data;
	dma_device = &xdev->common;

	/* Stop all the activity when the channel is closed assuming this
	 * may help if the application is aborted without normal close
	 */
	 for( i = 0 ; i < XILINX_DMA_MAX_CHANS_PER_DEVICE; i++) {
		dma_device->device_terminate_all(& (xdev->chan[ i ] ->common) );
	 }
	return 0;
}


/**
* Perform I/O control to start a DMA transfer.
*/
static long zynqmp_axidma_ioctl(struct file *file, unsigned int cmd , unsigned long arg)
{
	int	status = 0;
	int	channo = 0;
	unsigned int idx;
	struct xilinx_dma_device *zynqdma_device = NULL;
	struct dma_async_tx_descriptor *chan_desc = NULL;
	struct dma_device *pdma = NULL;
	struct dma_chan *chan = NULL;
	struct scatterlist	rx_sg;		//dma rx channel
	enum dma_ctrl_flags	flags;
	AxiDmaUserPrm	UserPrm;


	zynqdma_device = (struct xilinx_dma_device *)file->private_data;
	pdma = &(zynqdma_device->common);		// get pointer of dma_device
	chan = &(zynqdma_device->chan[channo]->common);	// Note: tricky ! because I use '...chan[0]' to 
												// get pointer of dma channel
	if( !DMA_IOCTL_CMD_IS_VALID( cmd ) ) {
		return -1;
	}
	cmd = DMA_IOCTL_CMD_GET( cmd );
	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
	switch( cmd ) {
	case DMA_CMD_SET:
		/* RX: allocate DMA ping-pang memory for receivng data from FPGA 
		 */
		zynqdma_device->interface_p = (struct dma_proxy_channel_interface *)
			dmam_alloc_coherent(zynqdma_device->dev,
						sizeof(struct dma_proxy_channel_interface),
						&zynqdma_device->interface_phys_addr, GFP_KERNEL);
		printk(KERN_INFO "Allocating uncached memory at viraddr: 0x%x, phyaddr: 0x%x\n",
				zynqdma_device->interface_p, (void *)zynqdma_device->interface_phys_addr);
		
		if (!zynqdma_device->interface_p) {
			dev_err(zynqdma_device->dev, "DMA memory allocation fail\n");
			return -1;
		}
		zynqdma_device->chan[channo]->phybase = zynqdma_device->interface_phys_addr;
		zynqdma_device->chan[channo]->dma_buf_idx = 0;		//used for ping-pang buffer switch
		zynqdma_device->chan[channo]->DmaPacketRdy = DMA_PACKET_NORDY;

		// transfer this parameter to user space
		UserPrm.bufIdx = zynqdma_device->chan[channo]->dma_buf_idx;
		UserPrm.PhyAddr = zynqdma_device->interface_phys_addr;
		UserPrm.VirAddr = (unsigned int )zynqdma_device->interface_p;
		status = copy_to_user( (void *)arg,
							&UserPrm,
							sizeof(UserPrm));
		if( status != 0 ) {
			printk(KERN_INFO"DMA_CMD_SET:hand dma para. to user plane fail\n");
			break;
		}
		
		/* For now, I only use a single entry in a scatter gather list, just for future
		 * flexibility for scatter gather.
		 */
		sg_init_table( &rx_sg, BD_RX_CNT);
		sg_dma_address( &rx_sg ) = zynqdma_device->interface_phys_addr;	//physical address
		sg_dma_len( &rx_sg ) = (sizeof(struct dma_proxy_channel_interface) ) >> 1;	// 1/2 * DMABUF_SIZE
		
		chan_desc = pdma->device_prep_slave_sg( chan, 
											&rx_sg, 
											BD_RX_CNT,
											DMA_DEV_TO_MEM,
											flags,
											NULL);
		if( !chan_desc ) {
			printk(KERN_ERR "prep_dma_sg (*) fail\n");
			status = -1;
			break;
		}
		chan_desc->callback = sync_callback;
		chan_desc->callback_param = &(zynqdma_device->chan[channo]->dma_buf_idx );
		init_completion( &zynqdma_device->cmp );
		
		zynqdma_device->cookie = chan_desc->tx_submit( chan_desc );
		if( dma_submit_error( zynqdma_device->cookie )) {
			printk(KERN_INFO"dma submit error\n");
			status = -1;
			break;
		}

		/*  Start the DMA transaction which was previous queued up in DMA engine
		*/
		dma_async_issue_pending( chan );
		break;
	
	case DMA_CMD_COPY:
		wait_event_interruptible( dma_data_wait,
							 (zynqdma_device->chan[channo]->DmaPacketRdy) == DMA_PACKET_RDY );

		// transfer this parameter to user space
		idx = zynqdma_device->chan[channo]->dma_buf_idx;
		UserPrm.bufIdx = idx;
		UserPrm.PhyAddr = (unsigned int)(zynqdma_device->interface_phys_addr + idx * (DMABUF_SIZE >> 1));
		UserPrm.VirAddr = ( (unsigned int)zynqdma_device->interface_p + idx *(DMABUF_SIZE >> 1));
		status = copy_to_user( (void *)arg,
							&UserPrm,
							sizeof(UserPrm));
		if( status != 0 ) {
			printk(KERN_INFO"DMA_CMD_COPY:hand dma para. to user plane fail\n");
			break;
		}
		
		zynqdma_device->chan[channo]->DmaPacketRdy = DMA_PACKET_NORDY;
		status = 0;
		break;

	default:
		break;

	}
	return status;
}


static struct file_operations dm_fops = {
	.owner	  = THIS_MODULE,
	.open	  = zynqmp_axidma_open,
	.release  = zynqmp_axidma_release,
	.unlocked_ioctl = zynqmp_axidma_ioctl,
	.mmap	= zynqmp_axidma_mmap
	};

/** 
* Initialize the driver to be a character device 
*/
static int cdevice_init(struct xilinx_dma_device *xdev,  char *name)
{
	int rc;
	static struct class *local_class_p = NULL;

	/* Allocate a character device from the kernel for this driver.
	 */
	rc = alloc_chrdev_region(&xdev->devno, 0, 1, DRIVER_NAME);
	if (rc) {
		dev_err(xdev->dev, "unable to get a char device number\n");
		return rc;
	}

	/* Initialize the device data structure before registering the character
	 * device with the kernel.
	 */
	cdev_init(&xdev->cdev, &dm_fops);
	xdev->cdev.owner = THIS_MODULE;
	rc = cdev_add(&xdev->cdev, xdev->devno, 1);
	if (rc) {
		dev_err(xdev->dev, "Err [%d] while adding device [%s]\n", rc, DRIVER_NAME);
		goto init_error1;
	}

	/* Create the device in sysfs which will allow the device node
	 * in /dev to be created
	 */
	if (!local_class_p) {
		local_class_p = class_create(THIS_MODULE, DRIVER_NAME);
		if (IS_ERR(xdev->dev->class)) {
			dev_err(xdev->dev, "unable to create class\n");
			rc = -1;
			goto init_error2;
		}
	}
	xdev->class_p = local_class_p;

	/* Create the device node in /dev so the device is accessible
	 * as a character device
	 */
	xdev->proxy_device_p = device_create(xdev->class_p, NULL,
					  	 xdev->devno, NULL, name);

	if (IS_ERR(xdev->proxy_device_p)) {
		dev_err(xdev->dev, "unable to create the device\n");
		goto init_error3;
	}

	return 0;

init_error3:
	class_destroy(xdev->class_p);

init_error2:
	cdev_del(&xdev->cdev);

init_error1:
	unregister_chrdev_region(xdev->devno, 1);
	
	return rc;
}

/**============================================
*                              end of added code
* =============================================*/
#endif

驱动程序运行后,在/dev下可以看到生成了如下两个dma设备文件:zynqmp_a0000000.dma,及zynqmp_a0001000.dma,通过ioctl()接口即可访问dma的memory,如下:

gDmaFd = open(“/dev/zynqmp_a0000000.dma”, O_RDWR);
……
DmaRet =  ioctl( gDmaFd, DmaCmd, &DmaMemAddr);
… …

三、结束语

通过在dma驱动中添加用户接口,实现了在用户面控制dma启动、读取dma数据,在本实现中,用户面程序可以通过IOCTL()接口访问dma的缓冲区,从而避免了大量的数据在用户空间与内核空间的拷贝。

你可能感兴趣的:(linux,内核,嵌入式,自动驾驶)