Linux DMA 内存拷贝与memcpy 速率比较

驱动层代码:

#include 
#include 
#include 
#include 
#include  
#include  
#include 
#include 
#include 
#include 
#include 
#include 


#define DEBUG_PRINT printk

#define MEMCPY_NO_DMA 0
#define MEMCPY_DMA    1
#define BUFF_SIZE     (512*1024)

struct cdev my_cdev;
static int major_ret;
static struct class *pdma_class;
static struct device *pdma_device;

static dma_addr_t *src = NULL;
static dma_addr_t src_phys ;
static dma_addr_t *dst = NULL;
static dma_addr_t dst_phys ;

static volatile int dma_finished = 0;
static DECLARE_WAIT_QUEUE_HEAD(wq);


static void do_memcpy_no_dma(void)
{
	unsigned long t1 , t2,diff,msec;
	int i ;
	t1  = jiffies;
	for(i = 0;i < 1000;i++)
	{
		memcpy(dst,src,BUFF_SIZE);	
	}
	t2 = jiffies;

	diff = (long)t2 - (long)t1;
	msec = diff *1000/HZ;

	DEBUG_PRINT("used:%ld ms\n",msec);
	
}

static void tx_callback(void *dma_async_param)
{
	//DEBUG_PRINT("callback here\n");
	dma_finished = 1;
	wake_up_interruptible(&wq);
}

static int do_memcpy_with_dma(void)
{
	struct dma_chan *chan = NULL;
	dma_cap_mask_t mask;
	
	struct dma_async_tx_descriptor *tx = NULL;

	dma_cookie_t dma_cookie;
	
	memset(src,0xAA,BUFF_SIZE);
	memset(dst,0x55,BUFF_SIZE);	
	
	dma_cap_zero(mask);
	
	dma_cap_set(DMA_MEMCPY, mask);
	
	chan = dma_request_channel(mask, NULL, NULL);
	if(NULL == chan )
	{
		printk("err:%s:%d\n",__FILE__,__LINE__);		
		return -1;
	}
	

	
	
	unsigned long t1 , t2,diff,msec;
	int i ;
	t1  = jiffies;
	for(i=0;i<1000;i++)
	{
		dma_finished = 0;
		//tx = dmaengine_prep_dma_cyclic(chan, src_phys, BUFF_SIZE, 1024, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
		tx = dmaengine_prep_dma_memcpy(chan, dst_phys, src_phys, BUFF_SIZE, DMA_PREP_INTERRUPT|DMA_CTRL_ACK);

		if(NULL == tx)
		{
			printk("err:%s:%d\n",__FILE__,__LINE__);	
			dma_release_channel(chan);
			return -1;
		}

		tx->callback = tx_callback;
		
		dma_cookie = dmaengine_submit(tx);
		if (dma_submit_error(dma_cookie))
		{
			printk("Failed to do DMA tx_submit");
		}
		
		dma_async_issue_pending(chan);	

		wait_event_interruptible(wq, dma_finished);
		
	}

	t2  = jiffies;
	diff = (long)t2 - (long)t1;
	msec = diff *1000/HZ;

	DEBUG_PRINT("used:%ld ms\n",msec);

	printk("ok !\n");
	if(memcmp(src, dst, BUFF_SIZE) == 0)
	{
		printk("memcpy succ !\n");
	}
	else
	{
		printk("memcpy failed !\n");
		int i = 0;
		for(i=0;i<8;i++)
		{
			printk("%x | %x\n",src[i],dst[i]);
		}
	}	

	
	dma_release_channel(chan);
	
}


static long dma_ioctl(struct file *file, unsigned int cmd, unsigned long data)
{
	switch (cmd)
	{
		case MEMCPY_NO_DMA:
			do_memcpy_no_dma();
			break;
		case MEMCPY_DMA:
			do_memcpy_with_dma();
			break;
	}
	return 0;
}


static const struct file_operations fops =
{
	.owner = THIS_MODULE,
	.unlocked_ioctl = dma_ioctl,
};
 
static int __init dma_init(void)
{
	dev_t devno = 0;
	
	alloc_chrdev_region(&devno, 0, 1, "my-dma");
	major_ret = MAJOR(devno);
	cdev_init(&my_cdev, &fops);
	cdev_add(&my_cdev, devno, 1);

	pdma_class = class_create(THIS_MODULE, "my-dma-class");

	pdma_device = device_create(pdma_class, NULL, MKDEV(major_ret,0), NULL, "my-dma");

	src = dma_alloc_coherent(NULL, BUFF_SIZE, &src_phys, GFP_KERNEL);

	if(NULL == src)
	{
		printk("err:%s:%d\n",__FILE__,__LINE__);
		goto _FAILED_ALLOC_SRC;
	}
	
	dst = dma_alloc_coherent(NULL, BUFF_SIZE, &dst_phys, GFP_KERNEL);	

	if(NULL == dst)
	{		
		printk("err:%s:%d\n",__FILE__,__LINE__);
		goto _FAILED_ALLOC_DST;
	}
	
	return 0;
_FAILED_ALLOC_DST:	
	
	dma_free_coherent(NULL, BUFF_SIZE, src, src_phys);
_FAILED_ALLOC_SRC:
	device_destroy(pdma_class, MKDEV(major_ret,0)); 
	class_destroy(pdma_class);	
	cdev_del(&my_cdev);
	unregister_chrdev_region(MKDEV(major_ret, 0), 1);

	return -1;
	
}
 
static void __exit dma_exit(void)
{	
	//printk("hello dma openwrt exit\n");
	device_destroy(pdma_class, MKDEV(major_ret,0));	
	class_destroy(pdma_class);

	dev_t devno = MKDEV(major_ret, 0);
	cdev_del(&my_cdev);
	unregister_chrdev_region(devno, 1);	

	dma_free_coherent(NULL, BUFF_SIZE, src, src_phys);
	dma_free_coherent(NULL, BUFF_SIZE, dst, dst_phys);	
	
}
 
module_init(dma_init);
module_exit(dma_exit);
 
MODULE_AUTHOR("hello world");
MODULE_DESCRIPTION("dma driver");
MODULE_LICENSE("GPL");
//MODULE_ALIAS("platform:" DRV_NAME);


应用层测试代码:

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define MEMCPY_NO_DMA 0
#define MEMCPY_DMA    1


void print_usage(char *argv)
{
	printf("usge:\n");
	printf("%s  \n");
}


int main(int argc ,char **argv)
{
	if(argc < 2)
	{
		print_usage(argv[0]);
		return -1;
	}

	int fd = open("/dev/my-dma",O_RDWR);

	if(fd < 0)
	{
		printf("open /dev/my-dma failed!\n");
		return -1;
	}

	if(strcmp("dma", argv[1]) == 0)
	{
		ioctl(fd,MEMCPY_DMA);
	}
	else
	{
		ioctl(fd,MEMCPY_NO_DMA);
	}



	close(fd);
	
}


测试结果:(H3 平台)
DMA 比 memcpy 快一倍!
Linux DMA 内存拷贝与memcpy 速率比较_第1张图片

疑问:

tx = dmaengine_prep_dma_memcpy(chan, dst_phys, src_phys, BUFF_SIZE, DMA_PREP_INTERRUPT|DMA_CTRL_ACK);

这里的 tx 没有看到 在哪 free 的,是否会内存泄漏?
答:不会,传输完成后由中断处理函数自动释放内存。

代码跟踪:

dmaengine_prep_dma_memcpy
  sun6i_dma_prep_dma_memcpy
    return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
      list_add_tail(&vd->node, &vc->desc_allocated);
        

先看 struct virt_dma_chan

struct virt_dma_chan {
	struct dma_chan	chan;
	struct tasklet_struct task;
	void (*desc_free)(struct virt_dma_desc *);

	spinlock_t lock;

	/* protected by vc.lock */
	struct list_head desc_allocated;
	struct list_head desc_submitted;
	struct list_head desc_issued;
	struct list_head desc_completed;

	struct virt_dma_desc *cyclic;
};

里面包含了void (*desc_free)(struct virt_dma_desc *); desc_free 函数。
来看一下desc_free 函数在哪被调用?

sun6i_dma_probe
    vchan->vc.desc_free = sun6i_dma_free_desc;
    vchan_init
      tasklet_init(&vc->task, vchan_complete, (unsigned long)vc)
        vchan_complete
		  list_for_each_entry_safe(vd, _vd, &head, node) {
				dmaengine_desc_get_callback(&vd->tx, &cb);
		
				list_del(&vd->node);
				if (dmaengine_desc_test_reuse(&vd->tx))
					list_add(&vd->node, &vc->desc_allocated);
				else
					vc->desc_free(vd);		     //最终调用 sun6i_dma_free_desc
				dmaengine_desc_callback_invoke(&cb, NULL);
			}

  ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0,
			       dev_name(&pdev->dev), sdc);
    sun6i_dma_interrupt                               // dma 中断处理函数
      vchan_cyclic_callback 
        tasklet_schedule(&vc->task)              //这里之后调用 vchan_complete


static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
{
	struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
	struct sun6i_dma_lli *v_lli, *v_next;
	dma_addr_t p_lli, p_next;

	if (unlikely(!txd))
		return;

	p_lli = txd->p_lli;
	v_lli = txd->v_lli;

	while (v_lli) {
		v_next = v_lli->v_lli_next;
		p_next = v_lli->p_lli_next;

		dma_pool_free(sdev->pool, v_lli, p_lli);

		v_lli = v_next;
		p_lli = p_next;
	}

	kfree(txd);    //释放内存
}

你可能感兴趣的:(Linux)