- 两个Domain之间的内存页共享, Doamin0和DomainU- 在这个页中设置一个共享ring- 为共享ring设置event channel- 在Dom0和DomU之间来回传递一些信息介绍
在xen中的虚拟机被称为Domain. Domain0(Dom0)是特别的并拥有与正实设备交互的设备驱动, 例如网卡.这个驱动被称为后端驱动. 在我们的例子中这被称为后端domain.
在被称为 DomainU(DomU)的用户Domain有一个相应的前端驱动, 其是虚拟设备的接口,为和真实设备通信在DomU中前端驱动要连接后端驱动. 在我们以下的例子中, 这个DomU被称为前端Domain.
Xen为共享Domain间的内存提供了授权表(Grant Tables). 设备驱动使用授权表工作. 每个Domain有它自己的授权表, 并与xen共享. 在这个表中的条目由授权引用(grant references)所标识. 授权引用在Domain间传递, 且所引用的共享页由授权表所指向, domain也设置一个共享环结构(ring structure), 其用于在domain间有效共享数据.先上代码:
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
//int page;
void *page;
struct as_request {
unsigned int id; /* private guest value echoed in resp */
unsigned int status;
unsigned int operation;
};
struct as_response {
unsigned int id; /* copied from request */
unsigned int status;
unsigned int operation; /* copied from request */
};
// The following makes the as_sring, as_back_ring, as_back_ring "types"
DEFINE_RING_TYPES(as, struct as_request, struct as_response);
struct info_t {
struct as_front_ring ring;
grant_ref_t gref;
int irq;
int port;
} info;
#define DOM0_ID 0
// Related the proc fs entries
static struct proc_dir_entry *proc_dir = NULL;
static struct proc_dir_entry *proc_file = NULL;
char proc_data[20];
#ifdef SHARED_MEM
/*
* Send an request via the shared ring to Dom0, following by an INT
*/
int send_request_to_dom0(void)
{
struct as_request *ring_req;
int notify;
static int reqid=9;
/* Write a request into the ring and update the req-prod pointer */
ring_req = RING_GET_REQUEST(&(info.ring), info.ring.req_prod_pvt);
ring_req->id = reqid;
ring_req->operation = reqid;
ring_req->status = reqid;
printk("\nxen:DomU: Fill in IDX-%d, with id=%d, op=%d, st=%d",
info.ring.req_prod_pvt, ring_req->id, ring_req->operation,
ring_req->status);
reqid++;
info.ring.req_prod_pvt += 1;
// Send a reqest to backend followed by an int if needed
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&(info.ring), notify);
if (notify) {
printk("\nxen:DomU: Sent a req to Dom0");
notify_remote_via_irq(info.irq);
} else {
printk("\nxen:DomU: No notify req to Dom0");
notify_remote_via_irq(info.irq);
}
printk("...\n");
return 0;
}
ssize_t file_write (struct file *filp, const char __user *buff,
unsigned long len, void *data)
{
int value;
printk("\nxen:domU: file_write %lu bytes", len);
//copy_from_user函数的目的是从用户空间拷贝数据到内核空间,失败返回没有被拷贝的字节数,
//成功返回0.
//buff->proc
if (copy_from_user(&proc_data[0], buff, len))
return -EFAULT;
proc_data[len] = '\x0';
//printk(" ,%s", &proc_data[0]);
value = simple_strtol(proc_data, 0, 10);//把一个字符串转换为一个有符号长整数
switch(value) {
case 1:
send_request_to_dom0();
printk(" ,value = %d", value);
break;
default:
printk(" ,value not recognized !");
}
return len;
}
int file_read (char* page, char**start, off_t off,
int count, int *eof, void *data)
{
sprintf(page, "%s", proc_data);
//把格式化的数据写入某个字符串缓冲区 写入page
return strlen(page);
}
/*
* We create a /proc/demo/file entry. When we write a "1" ino this file once
* the module is loaded, the file_write function() above is called and this
* sends a requesst on the shared ring to the Dom0. This way we test the
* event channel and shared ring routines.
*/
int create_procfs_entry(void)//创建虚拟文件夹,及文件
{
int ret = 0;
proc_dir = proc_mkdir("demo", NULL);
if (!proc_dir) {
printk("\nxen:domU Could not create demo entry in procfs");
ret = -EAGAIN;
return ret;
}
/*要在 /proc 文件系统中创建一个虚拟文件,请使用 create_proc_entry 函数。这个函数可以接收一个文件名
、一组权限和这个文件在 /proc 文件系统中出现的位置。create_proc_entry 的返回值
是一个 proc_dir_entry 指针(或者为 NULL,说明在 create 时发生了错误)*/
proc_file = create_proc_entry("file", 0600, proc_dir);
if (proc_file) {
proc_file->read_proc = file_read;
proc_file->write_proc = file_write;
#if PROC_OWNER
proc_file->owner = THIS_MODULE;
#endif
} else {
printk("\nxen:domU Could not create /proc/demo/file");
ret = -EAGAIN;
return ret;
}
return ret;
}
/*
* Our interrupt handler for event channel that we set up
*/
static irqreturn_t as_int (int irq, void *dev_id)//中断处理函数
{
struct as_response *ring_resp;
RING_IDX i, rp;
printk("\nxen:DomU: as_int called");
again:
rp = info.ring.sring->rsp_prod;
printk("\nxen:DomU: ring pointers %d to %d", info.ring.rsp_cons, rp);
for(i=info.ring.rsp_cons; i != rp; i++) {
unsigned long id;
// what did we get from Dom0
ring_resp = RING_GET_RESPONSE(&(info.ring), i);
printk("\nxen:DomU: Recvd in IDX-%d, with id=%d, op=%d, st=%d",
i, ring_resp->id, ring_resp->operation, ring_resp->status);
id = ring_resp->id;
switch(ring_resp->operation) {
case 0:
printk("\nxen:DomU: operation:0");
break;
default:
break;
}
}
info.ring.rsp_cons = i;
if (i != info.ring.req_prod_pvt) {
int more_to_do;
RING_FINAL_CHECK_FOR_RESPONSES(&info.ring, more_to_do);
if(more_to_do)
goto again;
} else
info.ring.sring->rsp_event = i+1;
return IRQ_HANDLED;
}
#endif
int init_module(void)
{
int mfn;
#ifdef ENABLE_EVENT_IRQ
int err;
#endif
struct as_sring *sring;
/*
* Allocates and returns a pointer to the first byte of a memory area
* that is several physically contiguous pages long, and doesn't zero
* out the area.
* GFP_KERNEL - process may sleep
*/
/*
在linux内核空间申请内存涉及的函数主要包括kmalloc()、__get_free_pages()和vmalloc()等。
kmalloc()和__get_free_pages()申请的内存位于物理内存映射区域(《896M,所以容易操作,
可以得到虚拟地址与物理地址),而且在物理上也是连续的,它们与真实的物理地址只有一个
固定的偏移,因此存在简单的转换关系。而vmalloc()在虚拟内存空间给出一块连续的内存空间
(>896,虚拟地址上连续),实质上,这片连续的虚拟内存在物理内存中并不一定连续,
而vmalloc()申请的虚拟内存和物理内存之间也没有简单的换算关系。*/
page = __get_free_pages(GFP_KERNEL, 1);
if (page == 0) {
printk("\nxen:DomU: could not get free page");
return 0;
}
#if ENABLE_SHARED_RING
/* Put a shared ring structure on this page */
sring = (struct as_sring*) page;
SHARED_RING_INIT(sring);
/*前端分配一个用于共享通信 ring 的内存页, 授权它给后端domain, 并放授权引用到xenstore,
这样后端就能 map 这个页. 有共享ring这个页是一个主页, 用于传递更多的授权引用*/
/* info.ring is the front_ring structure */
FRONT_RING_INIT(&(info.ring), sring, PAGE_SIZE);
#endif
mfn = virt_to_mfn(page);//?????****
/*
* The following grant table func is in drivers/xen/grant-table.c
* For shared pages, used for synchronous data, advertise a page to
* be shared via the hypervisor fu[nction call gnttab_grant_foreign_access.
* This call notifies the hypervisor that other domains are allowed to
* access this page.
*
* gnttab_map() has been called earlier to setup gnttable_setup_table
* during init phase, with a call to HYPERVISOR_grant_table_op(
* GNTTAB_setup_table...) and
* "shared" pages have been malloc'ed. This "shared" page is then used
* below later during the actual grant of a ref by this DOM.
*
* gnttab_grant_foreign_access()
* => get_free_entries
* gnttab_free_head - points to the ref of the head
* gnttab_free_count- keeps number of free refs
*
* Get a ref id by calling gnttab_entry(head)
* gnttab_list[entry/RPP][entry%RPP]
* => gnttab_grat_foreign_access_ref
* =>update_grant_entry
* shared[ref].frame/domid/flags are updated
* "shared" above is a pointer to struct grant_entry (flags/domid/frame)
*/
info.gref = gnttab_grant_foreign_access(DOM0_ID, mfn, 0);
if (info.gref < 0) {
printk("\nxen: could not grant foreign access");
free_page((unsigned long)page);
return 0;
}
/*
* The following strcpy is commented out, but was used initally to test
* is the memory page is indeed shared with Dom0, when in Dom0, we do a
* sprintf of the same memory location and get the same characters.
*/
strcpy((char*)page, "aseem sethi");
/*
* TBD: Save gref to be sent via Xenstore to dom-0. As of now both the
* gref and the event channel port id is sent manually during insmod
* in the dom0 module.
*/
printk("\n gref = %d", info.gref);
/* Setup an event channel to Dom0 */
#ifdef ENABLE_EVENT_IRQ
err = bind_listening_port_to_irqhandler(DOM0_ID, as_int, 0,
"xen-eg", &info);
if (err < 0) {
printk("\nxen:DomU failed to setup evtchn !");
gnttab_end_foreign_access(info.gref, 0, page);
return 0;
}
info.irq = err;
info.port = irq_to_evtchn_port(info.irq);
printk(" interupt = %d, local-port = %d", info.irq, info.port);
printk("....\n...");
create_procfs_entry();
#endif
return 0;
}
void cleanup_module(void)
{
printk("\nCleanup grant ref:");
if (gnttab_query_foreign_access(info.gref) == 0) {
//Remove the grant to the page
printk("\n xen: No one has mapped this frame");
// If 3rd param is non NULL, page has to be freed
gnttab_end_foreign_access(info.gref, 0, page);
// free_pages(page,1);
} else {
printk("\n xen: Someone has mapped this frame");
// Guess, we still free the page, since we are rmmod-ed
gnttab_end_foreign_access(info.gref, 0, page);
}
/* Cleanup proc entry */
remove_proc_entry("file", proc_dir);
remove_proc_entry("demo", NULL);
printk("....\n...");
}
MODULE_LICENSE("GPL");
里面的备注可以多多少少增进一下大家对函数以及结构体的理解,现在我们大体说一下:
int send_request_to_dom0(void) 将请求写入RING之后再通过RING到达backend
file_write() 和file_read() 与将buff内容从用户控件写入内核空间proc_data在写入page中
create_procfs_entry()创建虚拟文件及文件夹
init_module() 为page分配内核空间,并设置共享RING及初始化前端RING,并验证是否可以被共享映射,为了验证在page中写入字符串"aseem sethi"。建立事件通道。gref和事件通道port需要在Dom0代码中手动加入。
cleanup_module()检查时候有dom映射过此页面,撤销映射,并且删除在/proc下的文件。
Dom0代码:
#include
#include
#include
#if 0
#include
#include
#include
#include
#include
#include
#else
#include
#include
#include
#include
#include
#include
#include
#include
#include
#endif
struct gnttab_map_grant_ref ops;//根据(dom,GR)奖对应的页映射到自己的地址空间
struct gnttab_unmap_grant_ref unmap_ops;//撤销页映射
struct as_request {
unsigned int id; /* private guest value, echoed in resp */
unsigned int status;
unsigned int operation;
};
struct as_response {
unsigned int id; /* copied from request */
unsigned int status;
unsigned int operation; /* copied from request */
};
typedef struct as_request as_request_t;
typedef struct as_response as_response_t;
// From /include/xen/interface/io/ring.h
// The following makes the as_sring, as_back_ring, as_back_ring "types"
DEFINE_RING_TYPES(as, struct as_request, struct as_response);//#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)
struct info_t {
int irq;
int gref;
int remoteDomain;
int evtchn;
struct as_back_ring ring;
} info;
int gref;
int port;
module_param(gref, int, 0644);//在domU中 gref port需要手动加入 init_module()
module_param(port, int, 0644);//编写一个内核模块则通过module_param()传递参数
/*关于中断处理函数的返回值:中断程序的返回值是一个特殊类型—irqreturn_t。但是中断程序的返回值却只有两个—IRQ_NONE和IRQ_HANDLED。
#ifndef _LINUX_IRQRETURN_H
#define _LINUX_IRQRETURN_H
typedef int irqreturn_t;
#define IRQ_NONE (0)
#define IRQ_HANDLED (1)
#define IRQ_RETVAL(x) ((x) != 0) //这个宏只是返回0或非0
#endif*/
#if ENABLE_SRING
static irqreturn_t as_int (int irq, void *dev_id)//io环操作 **请求,应答
{
RING_IDX rc, rp;//typedef unsigned int RING_IDX;
as_request_t req;
as_response_t resp;
int more_to_do, notify;
// dev_id is a pointer to the info structure
printk("\nxen:Dom0: as_int called with dev_id %x info=%x",
(unsigned int)dev_id, (unsigned int)&info);
rc = info.ring.req_cons;
rp = info.ring.sring->req_prod;
printk(" rc =%d rp =%d", rc, rp);
while(rc!=rp) {
/*
define RING_REQUEST_CONS_OVERFLOW(_r, _cons)
(((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
*/
if(RING_REQUEST_CONS_OVERFLOW(&info.ring, rc))//RING 请求溢出
break;
// what did we get from the frontend at index rc
memcpy(&req, RING_GET_REQUEST(&info.ring, rc), sizeof(req));
resp.id = req.id;
resp.operation = req.operation;
resp.status = req.status+1; // Send back a status +1 of what was recvd
printk("\nxen:Dom0: Recvd at IDX-%d: id=%d, op=%d, status=%d",
rc, req.id, req.operation, req.status);
// update the req-consumer
info.ring.req_cons = ++rc;
barrier();//防止读写出错
switch (req.operation) {
case 0:
printk("\nxen:Dom0: req.operation = 0");
break;
default:
printk("\nxen:Dom0: req.operation = %d", req.operation);
break;
}
memcpy(RING_GET_RESPONSE(&info.ring, info.ring.rsp_prod_pvt),
&resp, sizeof(resp));
info.ring.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info.ring, notify);
if(info.ring.rsp_prod_pvt == info.ring.req_cons) {
RING_FINAL_CHECK_FOR_REQUESTS(&info.ring, more_to_do);
} else if (RING_HAS_UNCONSUMED_REQUESTS(&info.ring)) {//还有未处理req
more_to_do = 1;
}
if(notify) {
printk("\nxen:Dom0: Send notify to DomU");
notify_remote_via_irq(info.irq);
}
}
return IRQ_HANDLED;
}
#endif
int init_module(void)
{
struct vm_struct *v_start;
#if ENABLE_SRING
as_sring_t *sring;
#endif
int err;
info.gref = gref;
info.remoteDomain = 1;
info.evtchn = port;
printk("\nxen: dom0: init_module with gref = %d", info.gref);
// The following function reserves a range of kernel address space and
// allocates pagetables to map that range. No actual mappings are created.
v_start = alloc_vm_area(PAGE_SIZE);//分配虚拟地址结构
if (v_start == 0) {//无法分配
free_vm_area(v_start);
printk("\nxen: dom0: could not allocate page");
return -EFAULT;
}
/* struct vm_struct {
struct vm_struct *next;//指向下一虚拟地址,加速查询
void *addr;//地址
unsigned long size;//大小
unsigned long flags;//标志
struct page **pages;//页指针
unsigned int nr_pages;
unsigned long phys_addr;//物理地址
};
struct vm_struct *alloc_vm_area(size_t size);//分配虚拟地址结构
void free_vm_area(struct vm_struct *area);//释放虚拟地址结构*/
/*
* ops struct in paramaeres
* host_addr, flags, ref
* ops struct out parameters
* status (zero if OK), handle (used to unmap later), dev_bus_addr
*/
//分配内存
gnttab_set_map_op(&ops, (unsigned long)v_start->addr, GNTMAP_host_map,
info.gref, info.remoteDomain); /* flags, ref, domID */
//GNTTABOP_map_grant_ref **操作码(映射到自己空间)
//HYPERVISOR_grant_table_op超级调用
if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &ops, 1)) {
printk("\nxen: dom0: HYPERVISOR map grant ref failed");
return -EFAULT;
}
if (ops.status) {
printk("\nxen: dom0: HYPERVISOR map grant ref failed status = %d",
ops.status);
return -EFAULT;
}
printk("\nxen: dom0: shared_page = %x, handle = %x, status = %x",
(unsigned int)v_start->addr, ops.handle, ops.status);
// Used for unmapping
unmap_ops.host_addr = (unsigned long)(v_start->addr);
unmap_ops.handle = ops.handle;
#define ENABLE_PRINT_PAGE 1
#if ENABLE_PRINT_PAGE //验证DomU page中写入的字符串"aseem sethi"
{
int i;
printk("\nBytes in page ");
for(i=0;i<=10;i++)
{
printk("%c", ((char*)(v_start->addr))[i]);
}
}
#endif
#if ENABLE_SRING
sring = (as_sring_t*)v_start->addr;
BACK_RING_INIT(&info.ring, sring, PAGE_SIZE);
/* Seetup an event channel to the frontend */
err = bind_interdomain_evtchn_to_irqhandler(info.remoteDomain,
info.evtchn, as_int, 0, "dom0-backend", &info);
if (err < 0) {
printk("\nxen: dom0: init_module failed binding to evtchn !");
err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
&unmap_ops, 1);
return -EFAULT;
}
info.irq = err;
printk("\nxen: dom0: end init_module: int = %d", info.irq);
#endif
printk("\nXEN: dom: end init_module\n");
return 0;
}
void cleanup_module(void) {
int ret;
printk("\nxen: dom0: cleanup_module");
// Unmap foreign frames
// ops.handle points to the pages that were initially mapped. Set in the
// __init() function
//ops.host_addr ponts to the heap where the pages were mapped
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unmap_ops, 1);
if (ret == 0) {
printk(" cleanup_module: unmapped shared frame");
} else {
printk(" cleanup_module: unmapped shared frame failed");
}
printk("...\n");
}
MODULE_LICENSE("GPL");
下面继续大致分析一下流程:
irqreturn_t as_int ()查看RING是否溢出,取得请求,取得回复,之后是检查RING中是否还有请求。这里需要大家有一些RING环的知识。关于barrier()函数,我这里有一个网址推荐
http://www.cnblogs.com/whyandinside/archive/2012/11/07/2759014.html
init_module(void) 映射到自己空间,并分配虚拟地址,验证addr地址中的字符串。
最后就是取消映射了