PMEM原理分析

Chipset:MSM8x25Q

Codebase:Android 4.1


PMEM使用:

PMEM使用比较简单,分单进程使用和共享进程使用同一块PMEM。

单进程使用:

1.      int master_fd = open(“/dev/pmem_xxx”,O_RDWR, 0);

2.      然后再mmap就可以使用了。

进程间共享PMEM:

进程A:

         和单进程使用方法一样。

进程B:

1.      int fd = open(“/dev/pmem_xxx”,O_RDWR, 0);

2.      ioctl(fd, PMEM_CONNECT,master_fd)    //PMEM_CONNECT表示准备要连接了,连接的PMEM对应的fd为master_fd,即进程A打开的PMEM对应的fd。

3.      然后作mmap就可以使用了。

因此关键是第二步,master_fd是从进程A传过来的,可以使用binder等通信机制。

PMEM对应的驱动代码流程也是比较简单的,利用了字符驱动的open/ioctl/mmap操作,下面直接分析代码。

PMEM初始化:

PMEM类似于ION里的carved-outmemory,先预留一块内存,然后需要使用的时候从上面分配一块。因此PMEM总有一天会被ION替代,至少我这么认为.

平台上定义了如下三个PMEM模块:  pmem_adsp, pmem_audio, pmem(mdp_pmem)。

static struct android_pmem_platform_data android_pmem_adsp_pdata = {
	.name = "pmem_adsp",	//给adsp使用
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,	//都是使用bitmap算法,后面会讲到。
	.cached = 1,
	.memory_type = MEMTYPE_EBI1,	//内存类型都是EBI1
};

static struct platform_device android_pmem_adsp_device = {
	.name = "android_pmem",
	.id = 1,
	.dev = { .platform_data = &android_pmem_adsp_pdata },
};

static unsigned pmem_mdp_size = MSM_PMEM_MDP_SIZE;
static int __init pmem_mdp_size_setup(char *p)
{
	pmem_mdp_size = memparse(p, NULL);
	return 0;
}
/*可以通过传参来设置pmem mdp的size, 其他pmem模块也如此。*/
early_param("pmem_mdp_size", pmem_mdp_size_setup);

static unsigned pmem_adsp_size = MSM_PMEM_ADSP_SIZE;
static int __init pmem_adsp_size_setup(char *p)
{
	pmem_adsp_size = memparse(p, NULL);
	return 0;
}

early_param("pmem_adsp_size", pmem_adsp_size_setup);

static struct android_pmem_platform_data android_pmem_audio_pdata = {
	.name = "pmem_audio",		//给audio使用
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,
	.cached = 0,
	.memory_type = MEMTYPE_EBI1,
};

static struct platform_device android_pmem_audio_device = {
	.name = "android_pmem",
	.id = 2,
	.dev = { .platform_data = &android_pmem_audio_pdata },
};

static struct android_pmem_platform_data android_pmem_pdata = {
	.name = "pmem",	//给mdp使用,Quaclomm为啥不写成pmem_mdp?
	.allocator_type = PMEM_ALLOCATORTYPE_BITMAP,
	.cached = 1,
	.memory_type = MEMTYPE_EBI1,
};
static struct platform_device android_pmem_device = {
	.name = "android_pmem",
	.id = 0,
	.dev = { .platform_data = &android_pmem_pdata },
};

有了相应的platform_device之后,肯定要找到其platform_driver作设备匹配去。对应文件是pmem.c

static int __init pmem_init(void)
{
	/*创建sysfs,位于/sys/kernel/ pmem_regions ,以每个PMEM模块的名字
命名,如pmem_audio。目录下的信息主要供用户空间查看当前PMEM模块的使用状况。*/
	/* create /sys/kernel/<PMEM_SYSFS_DIR_NAME> directory */
	pmem_kset = kset_create_and_add(PMEM_SYSFS_DIR_NAME,
		NULL, kernel_kobj);
	if (!pmem_kset) {
		pr_err("pmem(%s):kset_create_and_add fail\n", __func__);
		return -ENOMEM;
	}
	/*寻找platform device,接着调用pmem_probe */
	return platform_driver_register(&pmem_driver);
}

static struct platform_driver pmem_driver = {
	.probe = pmem_probe,
	.remove = pmem_remove,
	.driver = { .name = "android_pmem",
		    .pm = &pmem_dev_pm_ops,
  }
};
static int pmem_probe(struct platform_device *pdev)
{
	struct android_pmem_platform_data *pdata;

	if (!pdev || !pdev->dev.platform_data) {
		pr_alert("Unable to probe pmem!\n");
		return -1;
	}
	pdata = pdev->dev.platform_data;
	/*power manager相关,这里不关注。*/
	pm_runtime_set_active(&pdev->dev);
	pm_runtime_enable(&pdev->dev);
	/*千呼万唤始出来,pmem初始化。*/
	return pmem_setup(pdata, NULL, NULL);
}

Pmem_setup()此函数比较长,不过流程还是比较简单的。
int pmem_setup(struct android_pmem_platform_data *pdata,
	       long (*ioctl)(struct file *, unsigned int, unsigned long),
	       int (*release)(struct inode *, struct file *))
{
	int i, index = 0, id;
	struct vm_struct *pmem_vma = NULL;
	struct page *page;
	/*系统对设备总的pmem模块数量有限制。*/
	if (id_count >= PMEM_MAX_DEVICES) {
		pr_alert("pmem: %s: unable to register driver(%s) - no more "
			"devices available!\n", __func__, pdata->name);
		goto err_no_mem;
	}
	/*size为0表示在系统初始化的时候并没有预留一部分内存空间给此PMEM模块。如果这样肯定会申请失败的。*/
	if (!pdata->size) {
		pr_alert("pmem: %s: unable to register pmem driver(%s) - zero "
			"size passed in!\n", __func__, pdata->name);
		goto err_no_mem;
	}

	id = id_count++;
	/*PMEM通过id来寻找对应的pmem模块*/
	pmem[id].id = id;
	/*表示已经分配过了。*/
	if (pmem[id].allocate) {
		pr_alert("pmem: %s: unable to register pmem driver - "
			"duplicate registration of %s!\n",
			__func__, pdata->name);
		goto err_no_mem;
	}
	/*PMEM支持多种不同的allocate算法,在下面的switch case语句中可看到,本平台都使用默认的bitmap算法,对应的allocate type为PMEM_ALLOCATORTYPE_BITMAP。 */
	pmem[id].allocator_type = pdata->allocator_type;
	/*quantum是bitmap的计算单位,最小为PAGE_SIZE,当然你也可以在
android_pmem_platform_data 结构中自己定义大小。*/
	/* 'quantum' is a "hidden" variable that defaults to 0 in the board
	 * files */
	pmem[id].quantum = pdata->quantum ?: PMEM_MIN_ALLOC;
	if (pmem[id].quantum < PMEM_MIN_ALLOC ||
		!is_power_of_2(pmem[id].quantum)) {
		pr_alert("pmem: %s: unable to register pmem driver %s - "
			"invalid quantum value (%#x)!\n",
			__func__, pdata->name, pmem[id].quantum);
		goto err_reset_pmem_info;
	}
	/*预留的PMEM模块size必须要以quantum对齐。*/
	if (pdata->size % pmem[id].quantum) {
		/* bad alignment for size! */
		pr_alert("pmem: %s: Unable to register driver %s - "
			"memory region size (%#lx) is not a multiple of "
			"quantum size(%#x)!\n", __func__, pdata->name,
			pdata->size, pmem[id].quantum);
		goto err_reset_pmem_info;
	}

	pmem[id].cached = pdata->cached;	//高速缓冲标志
	pmem[id].buffered = pdata->buffered;	//写缓存标志
	pmem[id].size = pdata->size;
	pmem[id].memory_type = pdata->memory_type;	/*系统使用的是EBI1接口的DDR,所以前面type定义都是*/
	strlcpy(pmem[id].name, pdata->name, PMEM_NAME_SIZE);
	/*PMEM模块可用的内存entries,以quantum为单位。*/
	pmem[id].num_entries = pmem[id].size / pmem[id].quantum;

	memset(&pmem[id].kobj, 0, sizeof(pmem[0].kobj));
	pmem[id].kobj.kset = pmem_kset;
	/*我们只用到bitmap算法,其他的有兴趣自己可研究。*/
	switch (pmem[id].allocator_type) {
	case PMEM_ALLOCATORTYPE_ALLORNOTHING:
~~snip
		break;
	case PMEM_ALLOCATORTYPE_BUDDYBESTFIT:
~~snip
		break;
	case PMEM_ALLOCATORTYPE_BITMAP: /* 0, default if not explicit */
		/*先分配64个bitm_alloc结构,用于后面管理PMEM模块的申请。
因为用户空间可能不会一下子申请调整个PMEM模块,如有两个进程都申请pmem_audio模块的一小部分内存。PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS 为64.*/
		pmem[id].allocator.bitmap.bitm_alloc = kmalloc(
			PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS *
				sizeof(*pmem[id].allocator.bitmap.bitm_alloc),
			GFP_KERNEL);
~~snip
		/*初始化 bitm_alloc结构体。*/
		for (i = 0; i < PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS; i++) {
			pmem[id].allocator.bitmap.bitm_alloc[i].bit = -1;
			pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0;
		}
		/*记录当前已经申请的bitm_alloc数量。*/
		pmem[id].allocator.bitmap.bitmap_allocs =
			PMEM_INITIAL_NUM_BITMAP_ALLOCATIONS;
		/*以32为单位记录当前整个PMEM模块的内存数量。*/
		pmem[id].allocator.bitmap.bitmap =
			kcalloc((pmem[id].num_entries + 31) / 32,
				sizeof(unsigned int), GFP_KERNEL);
		if (!pmem[id].allocator.bitmap.bitmap) {
			pr_alert("pmem: %s: Unable to register pmem "
				"driver - can't allocate bitmap!\n",
				__func__);
			goto err_cant_register_device;
		}
		/*当前空闲的entries。*/
		pmem[id].allocator.bitmap.bitmap_free = pmem[id].num_entries;
		/*下面这几个函数会在用户空间通过open/ioctl/mmap用到。*/
		pmem[id].allocate = pmem_allocator_bitmap;
		pmem[id].free = pmem_free_bitmap;
		pmem[id].free_space = pmem_free_space_bitmap;
		pmem[id].len = pmem_len_bitmap;
		pmem[id].start_addr = pmem_start_addr_bitmap;

		DLOG("bitmap allocator id %d (%s), num_entries %u, raw size "
			"%lu, quanta size %u\n",
			id, pdata->name, pmem[id].allocator.bitmap.bitmap_free,
			pmem[id].size, pmem[id].quantum);
		break;

	case PMEM_ALLOCATORTYPE_SYSTEM:
~~snip
	}

	pmem[id].ioctl = ioctl;
	pmem[id].release = release;
	mutex_init(&pmem[id].arena_mutex);
	mutex_init(&pmem[id].data_list_mutex);
	INIT_LIST_HEAD(&pmem[id].data_list);

	pmem[id].dev.name = pdata->name;
	pmem[id].dev.minor = id;	//后面使用id来寻找对应的pmem模块。
	pmem[id].dev.fops = &pmem_fops;	//后面用户空间的操作都会调用的这个变量里的函数指针了。
	pmem[id].reusable = pdata->reusable;
	pr_info("pmem: Initializing %s as %s\n",
		pdata->name, pdata->cached ? "cached" : "non-cached");
	/*注册为字符设备,会看到/dev/pmem_**, 如/dev/pmem_audio,供用户空间
操作设备。*/
	if (misc_register(&pmem[id].dev)) {
		pr_alert("Unable to register pmem driver!\n");
		goto err_cant_register_device;
	}
~~snip
	page = alloc_page(GFP_KERNEL);
	if (!page) {
		pr_err("pmem: Failed to allocate page for %s\n", pdata->name);
		goto cleanup_vm;
	}
	pmem[id].garbage_pfn = page_to_pfn(page);
	atomic_set(&pmem[id].allocation_cnt, 0);

	if (pdata->setup_region)
		pmem[id].region_data = pdata->setup_region();

	if (pdata->request_region)
		pmem[id].mem_request = pdata->request_region;

	if (pdata->release_region)
		pmem[id].mem_release = pdata->release_region;

	pr_info("allocating %lu bytes at %lx physical for %s\n",
		pmem[id].size, pmem[id].base, pmem[id].name);

	return 0;

~~snip
	return -1;
}

PMEM使用驱动分析:

open

当用户进程open pmem设备的时候,会调用到pmem_open:

static int pmem_open(struct inode *inode, struct file *file)
{
	struct pmem_data *data;
	int id = get_id(file);
	int ret = 0;
#if PMEM_DEBUG_MSGS
	char currtask_name[FIELD_SIZEOF(struct task_struct, comm) + 1];
#endif

	DLOG("pid %u(%s) file %p(%ld) dev %s(id: %d)\n",
		current->pid, get_task_comm(currtask_name, current),
		file, file_count(file), get_name(file), id);
	/*分配struct pmem_data。*/
	data = kmalloc(sizeof(struct pmem_data), GFP_KERNEL);
	if (!data) {
		printk(KERN_ALERT "pmem: %s: unable to allocate memory for "
				"pmem metadata.", __func__);
		return -1;
	}
	data->flags = 0;
	data->index = -1;
	data->task = NULL;
	data->vma = NULL;
	data->pid = 0;
	data->master_file = NULL;
#if PMEM_DEBUG
	data->ref = 0;
#endif
	INIT_LIST_HEAD(&data->region_list);
	init_rwsem(&data->sem);

	file->private_data = data;
	INIT_LIST_HEAD(&data->list);

	mutex_lock(&pmem[id].data_list_mutex);
	list_add(&data->list, &pmem[id].data_list);
	mutex_unlock(&pmem[id].data_list_mutex);
	return ret;
}

Open似乎没做什么特殊事情,只是分配一个struct pmem_data,然后初始化之后保存到file的私有数据之中。

mmap

open好了之后,用户进程想要使用PMEM,通过mmap实现,对应的是Kernel中的pmem_mmap。

static int pmem_mmap(struct file *file, struct vm_area_struct *vma)
{
	/*取出open时创建的struct pem_data.*/
	struct pmem_data *data = file->private_data;
	int index = -1;
	/*要映射的size大小。*/
	unsigned long vma_size =  vma->vm_end - vma->vm_start;
	int ret = 0, id = get_id(file);

~~snip
	/* check this file isn't already mmaped, for submaps check this file
	 * has never been mmaped */
	/*如果类型为submap,也不用再mmap。这部分和进程间共享PMEM有关,
也就是说当主进程做了mmap之后,另外一个要共享的进程就无需再mmap了。*/
	if ((data->flags & PMEM_FLAGS_SUBMAP) ||
	    (data->flags & PMEM_FLAGS_UNSUBMAP)) {
#if PMEM_DEBUG
		pr_err("pmem: you can only mmap a pmem file once, "
		       "this file is already mmaped. %x\n", data->flags);
#endif
		ret = -EINVAL;
		goto error;
	}
	/* if file->private_data == unalloced, alloc*/
	/*index表示当前分配的位于bitmap中的索引,如果为-1就表示未分配。*/
	if (data->index == -1) {
		mutex_lock(&pmem[id].arena_mutex);
		/*根据id号来从PMEM模块上分配一部分内存,返回在bitmap的索引。*/
		index = pmem_allocate_from_id(id,
				vma->vm_end - vma->vm_start,
				SZ_4K);
		mutex_unlock(&pmem[id].arena_mutex);
		/* either no space was available or an error occured */
		if (index == -1) {
			pr_err("pmem: mmap unable to allocate memory"
				"on %s\n", get_name(file));
			ret = -ENOMEM;
			goto error;
		}
		/* store the index of a successful allocation */
		data->index = index;
	}
	/*分配的size不能超过整个PMEM模块长度。*/
	if (pmem[id].len(id, data) < vma_size) {
#if PMEM_DEBUG
		pr_err("pmem: mmap size [%lu] does not match"
		       " size of backing region [%lu].\n", vma_size,
		       pmem[id].len(id, data));
#endif
		ret = -EINVAL;
		goto error;
	}
	/*调用的是pmem_start_addr_bitmap 函数,返回当前在整个PMEM模块
中的偏移。*/
	vma->vm_pgoff = pmem[id].start_addr(id, data) >> PAGE_SHIFT;
	/*cache的禁止操作。*/
	vma->vm_page_prot = pmem_phys_mem_access_prot(file, vma->vm_page_prot);
	/* PMEM_FLAGS_CONNECTED 在ioctl接口中会被定义,表示要共享PMEM内存。可以先看如果要共享内存,mmap做了什么。*/
	if (data->flags & PMEM_FLAGS_CONNECTED) {
		struct pmem_region_node *region_node;
		struct list_head *elt;
	/*插入一个pfn页框到用vma中*/
		if (pmem_map_garbage(id, vma, data, 0, vma_size)) {
			pr_alert("pmem: mmap failed in kernel!\n");
			ret = -EAGAIN;
			goto error;
		}
	/*根据当前有多少region_list作一一映射。*/
		list_for_each(elt, &data->region_list) {
			region_node = list_entry(elt, struct pmem_region_node,
						 list);
			DLOG("remapping file: %p %lx %lx\n", file,
				region_node->region.offset,
				region_node->region.len);
			if (pmem_remap_pfn_range(id, vma, data,
						 region_node->region.offset,
						 region_node->region.len)) {
				ret = -EAGAIN;
				goto error;
			}
		}
		/*标记当前是submap。*/
		data->flags |= PMEM_FLAGS_SUBMAP;
		get_task_struct(current->group_leader);
		data->task = current->group_leader;
		data->vma = vma;
#if PMEM_DEBUG
		data->pid = current->pid;
#endif
		DLOG("submmapped file %p vma %p pid %u\n", file, vma,
		     current->pid);
	} else {
		/mastermap走如下流程。映射vma_size大小到用户空间。*/
		if (pmem_map_pfn_range(id, vma, data, 0, vma_size)) {
			pr_err("pmem: mmap failed in kernel!\n");
			ret = -EAGAIN;
			goto error;
		}
		data->flags |= PMEM_FLAGS_MASTERMAP;
		data->pid = current->pid;
	}
	vma->vm_ops = &vm_ops;
error:
	up_write(&data->sem);
	return ret;
}

在这个阶段,我们主要关心的为非共享PMEM的操作,当执行了mmap之后,用户空间就直接可以操作pmem了。

上面还有个重要的函数没作分析,pmem_allocate_from_id():

static int pmem_allocate_from_id(const int id, const unsigned long size,
						const unsigned int align)
{
	int ret;
	ret = pmem_get_region(id);
	if (ret)
		return -1;
	/*调用的是pmem_allocator_bitmap().*/
	ret = pmem[id].allocate(id, size, align);
	if (ret < 0)
		pmem_put_region(id);
	return ret;
}
static int pmem_get_region(int id)
{
	/* Must be called with arena mutex locked */
	atomic_inc(&pmem[id].allocation_cnt);
	if (!pmem[id].vbase) {
~~snip
		/*根据id作ioremap*/
		ioremap_pmem(id);
	}
~~snip
}
static void ioremap_pmem(int id)
{
	unsigned long addr;
	const struct mem_type *type;

	DLOG("PMEMDEBUG: ioremaping for %s\n", pmem[id].name);
	if (pmem[id].map_on_demand) {
~~snip
	} else {
	/*如果需要cache则调用ioremap_cached,否则调用ioremap。*/
		if (pmem[id].cached)
			pmem[id].vbase = ioremap_cached(pmem[id].base,
						pmem[id].size);
~~snip
		else
			pmem[id].vbase = ioremap(pmem[id].base, pmem[id].size);
	}
}
}

这样,得到PMEM模块对应的内核虚拟地址。

接着就是pmem_allocator_bitmap,看它如何利用bitmap来分配及管理内存。

static int pmem_allocator_bitmap(const int id,
		const unsigned long len,
		const unsigned int align)
{
	/* caller should hold the lock on arena_mutex! */
	int bitnum, i;
	unsigned int quanta_needed;

	DLOG("bitmap id %d, len %ld, align %u\n", id, len, align);
~~snip
	/*以quantum为单位计算要分配的内存大小。*/
	quanta_needed = (len + pmem[id].quantum - 1) / pmem[id].quantum;
	/*超过整个pmem模块的数量则失败。*/
	if (pmem[id].allocator.bitmap.bitmap_free < quanta_needed) {
		return -1;
	}
	/*将要申请的quanta数量再次作一个转换,因为要考虑对齐等因素。*/
	bitnum = reserve_quanta(quanta_needed, id, align);
	if (bitnum == -1)
		goto leave;
	/*找到第一个未被使用过的bitmap的位置。*/
	for (i = 0;
		i < pmem[id].allocator.bitmap.bitmap_allocs &&
			pmem[id].allocator.bitmap.bitm_alloc[i].bit != -1;
		i++)
		;
	/*如果找到的位置已经超出当前的bitmap_allocs数量,则要重新
分配更大的一块bitm_alloc.*/
	if (i >= pmem[id].allocator.bitmap.bitmap_allocs) {
		void *temp;
		/*申请的数量比上次大一倍。*/
		int32_t new_bitmap_allocs =
			pmem[id].allocator.bitmap.bitmap_allocs << 1;
		int j;

		if (!new_bitmap_allocs) { /* failed sanity check!! */
			return -1;
		}
		/*申请数量不能大于当前PMEM模块实际的数量。*/
		if (new_bitmap_allocs > pmem[id].num_entries) {
			/* failed sanity check!! */
			return -1;
		}
		/*重新分配和指定。*/
		temp = krealloc(pmem[id].allocator.bitmap.bitm_alloc,
				new_bitmap_allocs *
				sizeof(*pmem[id].allocator.bitmap.bitm_alloc),
				GFP_KERNEL);
		if (!temp) {
			return -1;
		}
		pmem[id].allocator.bitmap.bitmap_allocs = new_bitmap_allocs;
		pmem[id].allocator.bitmap.bitm_alloc = temp;
		/*只对重新分配的部分作初始化。*/
		for (j = i; j < new_bitmap_allocs; j++) {
			pmem[id].allocator.bitmap.bitm_alloc[j].bit = -1;
			pmem[id].allocator.bitmap.bitm_alloc[i].quanta = 0;
		}

		DLOG("increased # of allocated regions to %d for id %d\n",
			pmem[id].allocator.bitmap.bitmap_allocs, id);
	}

	DLOG("bitnum %d, bitm_alloc index %d\n", bitnum, i);

	pmem[id].allocator.bitmap.bitmap_free -= quanta_needed;
	pmem[id].allocator.bitmap.bitm_alloc[i].bit = bitnum;
	pmem[id].allocator.bitmap.bitm_alloc[i].quanta = quanta_needed;
leave:
	return bitnum;
}

static int reserve_quanta(const unsigned int quanta_needed,
		const int id,
		unsigned int align)
{
	/* alignment should be a valid power of 2 */
	int ret = -1, start_bit = 0, spacing = 1;
~~snip
	start_bit = bit_from_paddr(id,
		(pmem[id].base + align - 1) & ~(align - 1));
	if (start_bit <= -1) {
		return -1;
	}
	spacing = align / pmem[id].quantum;
	spacing = spacing > 1 ? spacing : 1;
	/*从memory pool上也就是当前拥有的PMEM内存块上分配出一片
区域来给当前申请用户进程。*/
	ret = bitmap_allocate_contiguous(pmem[id].allocator.bitmap.bitmap,
		quanta_needed,
		(pmem[id].size + pmem[id].quantum - 1) / pmem[id].quantum,
		spacing,
		start_bit);

	return ret;
}

内存分配完成。

ioctl

PMEM提供了若干个ioctl的cmd供用户空间操作,有获取当前申请的len,获取PMEM模块的总size,申请pmem等,这里我们重点关注alloc, map, connect,其他几个很简单,可自行分析。

static long pmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	/* called from user space as file op, so file guaranteed to be not
	 * NULL
	 */
	struct pmem_data *data = file->private_data;
	int id = get_id(file);
#if PMEM_DEBUG_MSGS
	char currtask_name[
		FIELD_SIZEOF(struct task_struct, comm) + 1];
#endif

	DLOG("pid %u(%s) file %p(%ld) cmd %#x, dev %s(id: %d)\n",
		current->pid, get_task_comm(currtask_name, current),
		file, file_count(file), cmd, get_name(file), id);

	switch (cmd) {
	case PMEM_GET_PHYS:
		~~snip
	case PMEM_MAP:
		{
			struct pmem_region region;
			DLOG("map\n");
			if (copy_from_user(®ion, (void __user *)arg,
						sizeof(struct pmem_region)))
				return -EFAULT;
			return pmem_remap(®ion, file, PMEM_MAP);
		}
		break;
	case PMEM_UNMAP:
		~~snip
	case PMEM_GET_SIZE:
		~~snip
	case PMEM_GET_TOTAL_SIZE:
		~~snip
	case PMEM_GET_FREE_SPACE:
~~snip
	case PMEM_ALLOCATE:
		{
			int ret = 0;
			DLOG("allocate, id %d\n", id);
			down_write(&data->sem);
			if (has_allocation(file)) {
				pr_err("pmem: Existing allocation found on "
					"this file descrpitor\n");
				up_write(&data->sem);
				return -EINVAL;
			}

			mutex_lock(&pmem[id].arena_mutex);
			data->index = pmem_allocate_from_id(id,
					arg,
					SZ_4K);
			mutex_unlock(&pmem[id].arena_mutex);
			ret = data->index == -1 ? -ENOMEM :
				data->index;
			up_write(&data->sem);
			return ret;
		}
	case PMEM_ALLOCATE_ALIGNED:
		~~snip
	case PMEM_CONNECT:
		DLOG("connect\n");
		return pmem_connect(arg, file);
	case PMEM_CLEAN_INV_CACHES:
	case PMEM_CLEAN_CACHES:
	case PMEM_INV_CACHES:
		~~snip
	default:
		if (pmem[id].ioctl)
			return pmem[id].ioctl(file, cmd, arg);

		DLOG("ioctl invalid (%#x)\n", cmd);
		return -EINVAL;
	}
	return 0;
}

PMEM_ALLOCATE:

咦,也是调用pmem_allocate_from_id(),在前面的mmap中,我们已经分析过了。

PMEM_CONNECT:

调用的是pmem_connect(),这个cmd主要是供另外一个进程和主进程共享PMEM用的,传进去的参数为主进程打开PMEM的fd,这里称主进程master .

static int pmem_connect(unsigned long connect, struct file *file)
{
	int ret = 0, put_needed;
	struct file *src_file;
~~snip
	/*根据主进程的fd获得相对应的file.*/
	src_file = fget_light(connect, &put_needed);
~~snip
	if (unlikely(!is_pmem_file(src_file))) {
		pr_err("pmem: %s: src file is not a pmem file!\n",
			__func__);
		ret = -EINVAL;
		goto put_src_file;
	} else {
		/*得到master的pmem data.*/
		struct pmem_data *src_data = src_file->private_data;

		if (!src_data) {
			pr_err("pmem: %s: src file pointer has no"
				"private data, bailing out!\n", __func__);
			ret = -EINVAL;
			goto put_src_file;
		}

		down_read(&src_data->sem);

		if (unlikely(!has_allocation(src_file))) {
			up_read(&src_data->sem);
			pr_err("pmem: %s: src file has no allocation!\n",
				__func__);
			ret = -EINVAL;
		} else {
			struct pmem_data *data;
			/*获得master分配到的内存在bitmap中的index.*/
			int src_index = src_data->index;

			up_read(&src_data->sem);

			data = file->private_data;
			if (!data) {
				pr_err("pmem: %s: passed in file "
					"pointer has no private data, bailing"
					" out!\n", __func__);
				ret = -EINVAL;
				goto put_src_file;
			}

			down_write(&data->sem);
			if (has_allocation(file) &&
					(data->index != src_index)) {
				up_write(&data->sem);

				pr_err("pmem: %s: file is already "
					"mapped but doesn't match this "
					"src_file!\n", __func__);
				ret = -EINVAL;
			} else {
				/*将master的pmem data数据保存到当前进程中。*/
				data->index = src_index;
				data->flags |= PMEM_FLAGS_CONNECTED;		//设置标志,在mmap中用到。
				data->master_fd = connect;
				data->master_file = src_file;

				up_write(&data->sem);

				DLOG("connect %p to %p\n", file, src_file);
			}
		}
	}
put_src_file:
	fput_light(src_file, put_needed);
leave:
	return ret;
}

嗯,这个cmd就是将master的struct pmem_data给了当前要共享的进程。

PMEM_MAP:

这个接口是为了用空进程想要执行remap而开设的,不过我想不到什么时候要重新映射呢?

int pmem_remap(struct pmem_region *region, struct file *file,
		      unsigned operation)
{
	int ret;
	struct pmem_region_node *region_node;
	struct mm_struct *mm = NULL;
	struct list_head *elt, *elt2;
	int id = get_id(file);
	struct pmem_data *data;
~~snip

	/* is_pmem_file fails if !file */
	data = file->private_data;
~~snip
	/* lock the mm and data */
	ret = pmem_lock_data_and_mm(file, data, &mm);
	if (ret)
		return 0;
/*明确指定只有master file才能作remap动作。*/
	/* only the owner of the master file can remap the client fds
	 * that back in it */
	if (!is_master_owner(file)) {
		ret = -EINVAL;
		goto err;
	}
~~snip
	if (operation == PMEM_MAP) {
	/*生成一个struct pem_region_node,用来保存上层传下来的region信息。*/
		region_node = kmalloc(sizeof(struct pmem_region_node),
			      GFP_KERNEL);
		if (!region_node) {
			ret = -ENOMEM;
			goto err;
		}
		region_node->region = *region;
		/*添加到data的region_list中。*/
		list_add(®ion_node->list, &data->region_list);
	} else if (operation == PMEM_UNMAP) {
		int found = 0;
		list_for_each_safe(elt, elt2, &data->region_list) {
			region_node = list_entry(elt, struct pmem_region_node,
				      list);
			if (region->len == 0 ||
			    (region_node->region.offset == region->offset &&
			    region_node->region.len == region->len)) {
				list_del(elt);
				kfree(region_node);
				found = 1;
			}
		}
~~snip
	/*比较好奇PMEM_FLAGS_SUBMAP是想共享的进程设置的,但是前面的判断又是
只能master file才能作remap,这样岂不是永远执行不了?*/
	if (data->vma && PMEM_IS_SUBMAP(data)) {
		if (operation == PMEM_MAP)
			ret = pmem_remap_pfn_range(id, data->vma, data,
				   region->offset, region->len);
		else if (operation == PMEM_UNMAP)
			ret = pmem_unmap_pfn_range(id, data->vma, data,
				   region->offset, region->len);
	}

err:
	pmem_unlock_data_and_mm(data, mm);
	return ret;
}

2013/03/04




你可能感兴趣的:(PMEM原理分析)