两种形式的dma 实现memory copy代码

在飞思卡尔的时候,需要用SDMA实现内存到内存memory copy的功能,需要做两部分的工作:

1:在DMA controller中加入M2M的支持。

2:写一个驱动来调用DMA controller的M2M功能。

上面的2实际上对于不同的SoC来讲,思路是一样的,有通用性,在这里总结下。

当时在实现的时候,用了两种方法:

1:cyclic, 用dma_alloc_coherent分配两段dma 内存空间, 一段做src, 一段做dst. 调用DMA controller接口来将src中的数据往dst中拷贝。因为DMA操作的是物理内地址上连续的内存空间,dma_alloc_coherent分配不了太大的连续物理地址空间,所以,仅仅能实现小批量数据的M2M拷贝。

2:sg, 用dma_alloc_coherent分配很多段dma 内存空间,一半大小的空间做src,一半大小的空间做dst.通过device_prep_dma_sg来将各自独立的src/dst空间链接起来。这个,可以将若干段分散的物理地址链接成逻辑上连续的,可以实现较大数据的拷贝。

顺便复习下dma engine的用法:
Linux/Documentation/dmaengine.txt

 13 The slave DMA usage consists of following steps:
 14 1. Allocate a DMA slave channel
 15 2. Set slave and controller specific parameters
 16 3. Get a descriptor for transaction
 17 4. Submit the transaction
 18 5. Issue pending requests and wait for callback notification
 
  
 20 1. Allocate a DMA slave channel
 27    Interface:
 28         struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
 29                         dma_filter_fn filter_fn,
 30                         void *filter_param);

 48 2. Set slave and controller specific parameters
 
  
 61    Interface:
 62         int dmaengine_slave_config(struct dma_chan *chan,
 63                                   struct dma_slave_config *config)

 
  
 70 3. Get a descriptor for transaction
 86    Interface:
 87         struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)(
 88                 struct dma_chan *chan, struct scatterlist *sgl,
 89                 unsigned int sg_len, enum dma_data_direction direction,
 90                 unsigned long flags);
 91 
 92         struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
 93                 struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 94                 size_t period_len, enum dma_data_direction direction);
 95 
 96         struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
 97                 struct dma_chan *chan, struct dma_interleaved_template *xt,
 98                 unsigned long flags);

139 4. Submit the transaction
144    Interface:
145         dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)

153 5. Issue pending DMA requests and wait for callback notification
 
  
163    Interface:
164         void dma_async_issue_pending(struct dma_chan *chan);

传输结束的时候可以用:
 
  
168 1. int dmaengine_terminate_all(struct dma_chan *chan)

看下面代码:
1: cyclic方式实现
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 

#include 
#include 

static int gMajor; //major number of device
static struct class *dma_tm_class;
static char *wbuf;
static char *rbuf;
static dma_addr_t wpaddr;
static dma_addr_t rpaddr;

struct dma_chan *dma_m2m_chan;

struct completion dma_m2m_ok;

#define SDMA_BUF_SIZE  1024

static bool dma_m2m_filter(struct dma_chan *chan, void *param)
{
	if (!imx_dma_is_general_purpose(chan))
		return false;
	chan->private = param;
	return true;
}

int sdma_open(struct inode * inode, struct file * filp)
{
	dma_cap_mask_t dma_m2m_mask;
	struct imx_dma_data m2m_dma_data = {0};


	init_completion(&dma_m2m_ok);	


	dma_cap_zero(dma_m2m_mask);
	dma_cap_set(DMA_SLAVE, dma_m2m_mask);
	m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY;
	m2m_dma_data.priority = DMA_PRIO_HIGH;
	
	dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter, &m2m_dma_data);
	if (!dma_m2m_chan) {
		printk("Error opening the SDMA memory to memory channel\n");
		return -EINVAL;
	}


	wbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &wpaddr, GFP_DMA);
	rbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &rpaddr, GFP_DMA);


	return 0;
}

int sdma_release(struct inode * inode, struct file * filp)
{
	dma_release_channel(dma_m2m_chan);
	dma_m2m_chan = NULL;
	dma_free_coherent(NULL, SDMA_BUF_SIZE, wbuf, wpaddr);
	dma_free_coherent(NULL, SDMA_BUF_SIZE, rbuf, rpaddr);


	return 0;
}

ssize_t sdma_read (struct file *filp, char __user * buf, size_t count, loff_t * offset)
{
	int i;
	
	wait_for_completion(&dma_m2m_ok);
	for (i=0; i	printk("src_data_%d = %x\n",i, *(wbuf+i) );
	}
	for (i=0; i	printk("dst_data_%d = %x\n",i, *(rbuf+i) );
	}
	
	return 0;
}

static void dma_m2m_callback(void *data)
{
	printk("in %s\n",__func__);
	complete(&dma_m2m_ok);
	return ;
}

ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count, loff_t * offset)
{
	u32 *index1;
	struct dma_slave_config dma_m2m_config;
	struct dma_async_tx_descriptor *dma_m2m_desc;
	int i;
	index1 = wbuf;
	for (i=0; i		*(index1 + i) = 0x12345678;
	}

	for (i=0; i	printk("%d : %x\n",i, *(wbuf+i) );
	}

	dma_m2m_config.direction = DMA_MEM_TO_MEM;
	dma_m2m_config.dst_addr = rpaddr;
	dma_m2m_config.src_addr = wpaddr;
	dma_m2m_config.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
	dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
	dma_m2m_config.dst_maxburst = 4;
	dma_m2m_config.src_maxburst = 4;
	dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config);
	dma_m2m_desc = dma_m2m_chan->device->device_prep_dma_cyclic(
					dma_m2m_chan, NULL, SDMA_BUF_SIZE, SDMA_BUF_SIZE/2, DMA_MEM_TO_MEM);
	dma_m2m_desc->callback = dma_m2m_callback;
	dmaengine_submit(dma_m2m_desc);
	return 0;
}

struct file_operations dma_fops = {
	open:		sdma_open,
	release:	sdma_release,
	read:		sdma_read,
	write:		sdma_write,
};

int __init sdma_init_module(void)
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
	struct device *temp_class;
#else
	struct class_device *temp_class;
#endif
	int error;


	/* register a character device */
	error = register_chrdev(0, "sdma_test", &dma_fops);
	if (error < 0) {
		printk("SDMA test driver can't get major number\n");
		return error;
	}
	gMajor = error;
	printk("SDMA test major number = %d\n",gMajor);


	dma_tm_class = class_create(THIS_MODULE, "sdma_test");
	if (IS_ERR(dma_tm_class)) {
		printk(KERN_ERR "Error creating sdma test module class.\n");
		unregister_chrdev(gMajor, "sdma_test");
		return PTR_ERR(dma_tm_class);
	}


#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28))
	temp_class = device_create(dma_tm_class, NULL,
				   MKDEV(gMajor, 0), NULL, "sdma_test");
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
	temp_class = device_create(dma_tm_class, NULL,
				   MKDEV(gMajor, 0), "sdma_test");
#else
	temp_class = class_device_create(dma_tm_class, NULL,
					     MKDEV(gMajor, 0), NULL,
					     "sdma_test");
#endif
	if (IS_ERR(temp_class)) {
		printk(KERN_ERR "Error creating sdma test class device.\n");
		class_destroy(dma_tm_class);
		unregister_chrdev(gMajor, "sdma_test");
		return -1;
	}


	printk("SDMA test Driver Module loaded\n");
	return 0;
}

static void sdma_cleanup_module(void)
{
	unregister_chrdev(gMajor, "sdma_test");
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
	device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#else
	class_device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#endif
	class_destroy(dma_tm_class);


	printk("SDMA test Driver Module Unloaded\n");
}

module_init(sdma_init_module);
module_exit(sdma_cleanup_module);

2:sg方式实现

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include


#include
#include


#include
#include


static int gMajor; /* major number of device */
static struct class *dma_tm_class;
u32 *wbuf, *wbuf2, *wbuf3, *wbuf4;
u32 *rbuf, *rbuf2, *rbuf3, *rbuf4;


struct dma_chan *dma_m2m_chan;


struct completion dma_m2m_ok;


struct scatterlist sg[4], sg2[4];


#define SDMA_BUF_SIZE  1024*60






static bool dma_m2m_filter(struct dma_chan *chan, void *param)
{
if (!imx_dma_is_general_purpose(chan))
return false;
chan->private = param;
return true;
}


int sdma_open(struct inode *inode, struct file *filp)
{
dma_cap_mask_t dma_m2m_mask;
struct imx_dma_data m2m_dma_data;


init_completion(&dma_m2m_ok);


dma_cap_zero(dma_m2m_mask);
dma_cap_set(DMA_SLAVE, dma_m2m_mask);
m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY;
m2m_dma_data.priority = DMA_PRIO_HIGH;
dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter,
&m2m_dma_data);
if (!dma_m2m_chan) {
printk("Error opening the SDMA memory to memory channel\n");
return -EINVAL;
}


wbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf) {
printk("error wbuf !!!!!!!!!!!\n");
return -1;
}


wbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf2) {
printk("error wbuf2 !!!!!!!!!!!\n");
return -1;
}


wbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf3) {
printk("error wbuf3 !!!!!!!!!!!\n");
return -1;
}


wbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf4) {
printk("error wbuf4 !!!!!!!!!!!\n");
return -1;
}


rbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf) {
printk("error rbuf !!!!!!!!!!!\n");
return -1;
}


rbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf2) {
printk("error rbuf2 !!!!!!!!!!!\n");
return -1;
}


rbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf3) {
printk("error rbuf3 !!!!!!!!!!!\n");
return -1;
}


rbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf4) {
printk("error rbuf4 !!!!!!!!!!!\n");
return -1;
}


return 0;
}


int sdma_release(struct inode * inode, struct file * filp)
{
dmaengine_terminate_all(dma_m2m_chan);
dma_release_channel(dma_m2m_chan);
dma_m2m_chan = NULL;
kfree(wbuf);
kfree(wbuf2);
kfree(wbuf3);
kfree(rbuf);
kfree(rbuf2);
kfree(rbuf3);
return 0;
}


ssize_t sdma_read (struct file *filp, char __user * buf, size_t count,
loff_t * offset)
{
int i;
#if 0
for (i=0; iprintk("dst data_%d : %x\n", i, *(rbuf+i));
}


for (i=0; iprintk("dst data2_%d : %x\n", i, *(rbuf2+i));
}


for (i=0; iprintk("dst data3_%d : %x\n", i, *(rbuf3+i));
}
#endif


for (i=0; iif (*(rbuf+i) != *(wbuf+i)) {
printk("buffer 1 copy falled!\n");
return 0;
}
}
printk("buffer 1 copy passed!\n");


for (i=0; iif (*(rbuf2+i) != *(wbuf2+i)) {
printk("buffer 2 copy falled!\n");
return 0;
}
}
printk("buffer 2 copy passed!\n");


for (i=0; iif (*(rbuf3+i) != *(wbuf3+i)) {
printk("buffer 3 copy falled!\n");
return 0;
}
}
printk("buffer 3 copy passed!\n");


for (i=0; iif (*(rbuf4+i) != *(wbuf4+i)) {
printk("buffer 4 copy falled!\n");
return 0;
}
}
printk("buffer 4 copy passed!\n");


return 0;
}


static void dma_m2m_callback(void *data)
{
complete(&dma_m2m_ok);
return ;
}


ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count,
loff_t * offset)
{
u32 *index1, *index2, *index3, *index4, i, ret;
struct dma_slave_config dma_m2m_config;
struct dma_async_tx_descriptor *dma_m2m_desc;
index1 = wbuf;
index2 = wbuf2;
index3 = wbuf3;
index4 = wbuf4;
struct timeval end_time;
unsigned long end, start;
for (i=0; i*(index1 + i) = 0x12121212;
}


for (i=0; i*(index2 + i) = 0x34343434;
}


for (i=0; i*(index3 + i) = 0x56565656;
}


for (i=0; i*(index4 + i) = 0x78787878;
}


#if 0
for (i=0; iprintk("input data_%d : %x\n", i, *(wbuf+i));
}


for (i=0; iprintk("input data2_%d : %x\n", i, *(wbuf2+i));
}


for (i=0; iprintk("input data3_%d : %x\n", i, *(wbuf3+i));
}
#endif
dma_m2m_config.direction = DMA_MEM_TO_MEM;
dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config);


sg_init_table(sg, 4);
sg_set_buf(&sg[0], wbuf, SDMA_BUF_SIZE);
sg_set_buf(&sg[1], wbuf2, SDMA_BUF_SIZE);
sg_set_buf(&sg[2], wbuf3, SDMA_BUF_SIZE);
sg_set_buf(&sg[3], wbuf4, SDMA_BUF_SIZE);
ret = dma_map_sg(NULL, sg, 4, dma_m2m_config.direction);


sg_init_table(sg2, 4);
sg_set_buf(&sg2[0], rbuf, SDMA_BUF_SIZE);
sg_set_buf(&sg2[1], rbuf2, SDMA_BUF_SIZE);
sg_set_buf(&sg2[2], rbuf3, SDMA_BUF_SIZE);
sg_set_buf(&sg2[3], rbuf4, SDMA_BUF_SIZE);
ret = dma_map_sg(NULL, sg2, 4, dma_m2m_config.direction);


dma_m2m_desc = dma_m2m_chan->device->
device_prep_dma_sg(dma_m2m_chan, sg2, 4, sg, 4, 0);
dma_m2m_desc->callback = dma_m2m_callback;
//printk("1111111111111\n");
do_gettimeofday(&end_time);
start = end_time.tv_sec*1000000 + end_time.tv_usec;


dmaengine_submit(dma_m2m_desc);
dma_async_issue_pending(dma_m2m_chan);


wait_for_completion(&dma_m2m_ok);
//printk("2222222222222\n");
do_gettimeofday(&end_time);
end = end_time.tv_sec*1000000 + end_time.tv_usec;
printk("end - start = %d\n", end - start);
dma_unmap_sg(NULL, sg, 4, dma_m2m_config.direction);
dma_unmap_sg(NULL, sg2, 4, dma_m2m_config.direction);


return 0;
}


struct file_operations dma_fops = {
open: sdma_open,
release: sdma_release,
read: sdma_read,
write: sdma_write,
};


int __init sdma_init_module(void)
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
struct device *temp_class;
#else
struct class_device *temp_class;
#endif
int error;


/* register a character device */
error = register_chrdev(0, "sdma_test", &dma_fops);
if (error < 0) {
printk("SDMA test driver can't get major number\n");
return error;
}
gMajor = error;
printk("SDMA test major number = %d\n",gMajor);


dma_tm_class = class_create(THIS_MODULE, "sdma_test");
if (IS_ERR(dma_tm_class)) {
printk(KERN_ERR "Error creating sdma test module class.\n");
unregister_chrdev(gMajor, "sdma_test");
return PTR_ERR(dma_tm_class);
}


#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28))
temp_class = device_create(dma_tm_class, NULL,
  MKDEV(gMajor, 0), NULL, "sdma_test");
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
temp_class = device_create(dma_tm_class, NULL,
  MKDEV(gMajor, 0), "sdma_test");
#else
temp_class = class_device_create(dma_tm_class, NULL,
    MKDEV(gMajor, 0), NULL,
    "sdma_test");
#endif
if (IS_ERR(temp_class)) {
printk(KERN_ERR "Error creating sdma test class device.\n");
class_destroy(dma_tm_class);
unregister_chrdev(gMajor, "sdma_test");
return -1;
}


printk("SDMA test Driver Module loaded\n");
return 0;
}


static void sdma_cleanup_module(void)
{
unregister_chrdev(gMajor, "sdma_test");
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#else
class_device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#endif
class_destroy(dma_tm_class);


printk("SDMA test Driver Module Unloaded\n");
}


module_init(sdma_init_module);
module_exit(sdma_cleanup_module);


当时测出大约1秒钟可以拷贝50M的数据,但是客户还觉得不满意。

你可能感兴趣的:(两种形式的dma 实现memory copy代码)