本章主要讲Xen分离设备模型中,设备的注册过程。涉及到Xenbus的具体使用方法。
1. 总线的注册
2. 驱动的注册
3. 设备的注册
Xenstore的读写。
总线的注册:
Xenbus有两种类型,前端总线和后端总线,分别为xenbus_frontend和xenbus_backend。
Xenbus的初始化过程:
1. 注册总线
Ø 前端总线
Ø 后端总线
2. 初始化xenstore的共享内存
3. 初始化xenstore的事件通道
4. 建立proc文件
Ø /proc/xen/xsd_kva
Ø /proc/xen/xsd_port
Ø /proc/xen/xenbus
5. 初始化xenstore的接口xs_init()
Ø 绑定中断处理函数wake_waiting()到xenstore的evtchn
Ø 创建内核线程[xenwatch]
Ø 创建内核线程[xenbus]
6. 注册xenbus设备(用处?)
Ø 前端设备
Ø 后端设备
#ifdef CONFIG_XEN
postcore_initcall(xenbus_probe_init);
MODULE_LICENSE("Dual BSD/GPL");
#else
int xenbus_init(void)
{
return xenbus_probe_init();
}
#endif
static int xenbus_probe_init(void)
{
int err = 0;
unsigned long page = 0;
DPRINTK("");
if (!is_running_on_xen())
return -ENODEV;
/* Register ourselves with the kernel bus subsystem */
xenbus_frontend.error = bus_register(&xenbus_frontend.bus); // 注册前端bus_type
if (xenbus_frontend.error)
printk(KERN_WARNING
"XENBUS: Error registering frontend bus: %i\n",
xenbus_frontend.error);
xenbus_backend_bus_register(); // 注册后端bus_type
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/ 在这时,dom0还没有进行xenstore的初始化,即没有evtchn和mfn
if (is_initial_xendomain()) {
struct evtchn_alloc_unbound alloc_unbound;
/* Allocate page. */
page = get_zeroed_page(GFP_KERNEL); // 分配页面
if (!page)
return -ENOMEM;
xen_store_mfn = xen_start_info->store_mfn = // 初始化xenstore的共享内存
以后用xenstore表现为xen_store_interface。xb_read()/xb_write()
三个全局变量需要设置:xen_store_evtchn, xen_store_mfn, xen_store_interface
pfn_to_mfn(virt_to_phys((void *)page) >> // 1.虚拟地址转换为物理地址
PAGE_SHIFT); // 2.获得物理页框
// 3.转换为机器页框
/* Next allocate a local port which xenstored can bind to */
alloc_unbound.dom = DOMID_SELF; // 从自己分配一个port
alloc_unbound.remote_dom = 0; // 开放给dom0
err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
&alloc_unbound); // hypercall分配
if (err == -ENOSYS)
goto err;
BUG_ON(err);
xen_store_evtchn = xen_start_info->store_evtchn =
alloc_unbound.port;
#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
/* And finally publish the above info in /proc/xen */
xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
if (xsd_kva_intf) {
memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops,
sizeof(xsd_kva_fops));
xsd_kva_fops.mmap = xsd_kva_mmap;
xsd_kva_intf->proc_fops = &xsd_kva_fops;
xsd_kva_intf->read_proc = xsd_kva_read;
}
xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
if (xsd_port_intf)
xsd_port_intf->read_proc = xsd_port_read;
#endif
xen_store_interface = mfn_to_virt(xen_store_mfn); // xenstore虚拟地址
} else {
xenstored_ready = 1; // 如果不是初始化
#ifdef CONFIG_XEN
xen_store_evtchn = xen_start_info->store_evtchn;
xen_store_mfn = xen_start_info->store_mfn;
xen_store_interface = mfn_to_virt(xen_store_mfn);
#else
xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
PAGE_SIZE);
#endif
}
xenbus_dev_init(); // 创建/proc/xen/xenbus
/* Initialize the interface to xenstore. */
err = xs_init();
if (err) {
printk(KERN_WARNING
"XENBUS: Error initializing xenstore comms: %i\n", err);
goto err;
}
/* Register ourselves with the kernel device subsystem */
if (!xenbus_frontend.error) {
xenbus_frontend.error = device_register(&xenbus_frontend.dev);
if (xenbus_frontend.error) {
bus_unregister(&xenbus_frontend.bus);
printk(KERN_WARNING
"XENBUS: Error registering frontend device: %i\n",
xenbus_frontend.error);
}
}
xenbus_backend_device_register();
if (!is_initial_xendomain())
xenbus_probe(NULL);
return 0;
err:
if (page)
free_page(page);
/*
* Do not unregister the xenbus front/backend buses here. The buses
* must exist because front/backend drivers will use them when they are
* registered.
*/
return err;
}
void xenbus_backend_bus_register(void)
{
xenbus_backend.error = bus_register(&xenbus_backend.bus);
if (xenbus_backend.error)
printk(KERN_WARNING
"XENBUS: Error registering backend bus: %i\n",
xenbus_backend.error);
}
void xenbus_backend_device_register(void)
{
if (xenbus_backend.error)
return;
xenbus_backend.error = device_register(&xenbus_backend.dev);
if (xenbus_backend.error) {
bus_unregister(&xenbus_backend.bus);
printk(KERN_WARNING
"XENBUS: Error registering backend device: %i\n",
xenbus_backend.error);
}
}
int xenbus_dev_init(void)
{
xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
if (xenbus_dev_intf)
xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops;
return 0;
}
int xs_init(void)
{
int err;
struct task_struct *task;
INIT_LIST_HEAD(&xs_state.reply_list);
spin_lock_init(&xs_state.reply_lock);
init_waitqueue_head(&xs_state.reply_waitq);
mutex_init(&xs_state.request_mutex);
mutex_init(&xs_state.response_mutex);
init_rwsem(&xs_state.transaction_mutex);
init_rwsem(&xs_state.watch_mutex);
/* Initialize the shared memory rings to talk to xenstored */
err = xb_init_comms();
if (err)
return err;
task = kthread_run(xenwatch_thread, NULL, "xenwatch"); // 创建内核线程[xenwatch]
if (IS_ERR(task))
return PTR_ERR(task);
xenwatch_pid = task->pid;
task = kthread_run(xenbus_thread, NULL, "xenbus"); // 创建内核线程[xenbus]
if (IS_ERR(task))
return PTR_ERR(task);
return 0;
}
/* Set up interrupt handler off store event channel. */
int xb_init_comms(void)
{
struct xenstore_domain_interface *intf = xen_store_interface; // xenstore内存页虚拟地址
int err;
if (intf->req_prod != intf->req_cons)
printk(KERN_ERR "XENBUS request ring is not quiescent "
"(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
// 如果请求的生产者和消费者不等,为什么不重新设置为相等?
if (intf->rsp_prod != intf->rsp_cons) {
printk(KERN_WARNING "XENBUS response ring is not quiescent "
"(%08x:%08x): fixing up\n",
intf->rsp_cons, intf->rsp_prod);
intf->rsp_cons = intf->rsp_prod; // 消费的生产者和消费者可能会重新设置。
}
if (xenbus_irq)
unbind_from_irqhandler(xenbus_irq, &xb_waitq);
err = bind_caller_port_to_irqhandler(
xen_store_evtchn, wake_waiting, // wake_waiting将唤醒xb_waitq
0, "xenbus", &xb_waitq);
if (err <= 0) {
printk(KERN_ERR "XENBUS request irq failed %i\n", err);
return err;
}
xenbus_irq = err;
return 0;
}
int bind_caller_port_to_irqhandler(
unsigned int caller_port,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
unsigned long irqflags,
const char *devname,
void *dev_id)
1. 分配一个随机的IRQ,并将xenstore的evtchn绑定到它。
2. 设置此IRQ的中断处理函数。
{
int irq, retval;
irq = bind_caller_port_to_irq(caller_port);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
return retval;
}
return irq;
}
static int bind_caller_port_to_irq(unsigned int caller_port)
{
int irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = evtchn_to_irq[caller_port]) == -1) {
if ((irq = find_unbound_irq()) < 0)
goto out;
evtchn_to_irq[caller_port] = irq; // 设置全局的evtchn <-> IRQ映射表
irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
}
irq_bindcount[irq]++;
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
}
static int find_unbound_irq(void)
{
static int warned;
int irq;
for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++)
if (irq_bindcount[irq] == 0) // 寻找一个未使用的随机IRQ
return irq;
if (!warned) {
warned = 1;
printk(KERN_WARNING "No available IRQ to bind to: "
"increase NR_DYNIRQS.\n");
}
return -ENOSPC;
}
/* IRQ <-> event-channel mappings. */
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
[0 ... NR_EVENT_CHANNELS-1] = -1 };
static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
{
if (unlikely(xenstored_ready == 0)) {
xenstored_ready = 1;
schedule_work(&probe_work);
}
wake_up(&xb_waitq); // 唤醒等待队列xb_waitq上的处理
return IRQ_HANDLED;
}
驱动的注册:
首先以设备类型来分类,对于每种类型的设备,分为front和back两种情况,分别有不同的设备驱动程序。
前端设备驱动程序将注册到前端总线;后端设备驱动程序将注册到后端总线。
例如,对于块设备,存在struct xenbus_driver blkfront和struct xenbus_driver blkback两种驱动程序。
static const struct xenbus_device_id blkfront_ids[] = {
{ "vbd" },
{ "" }
};
MODULE_ALIAS("xen:vbd");
static struct xenbus_driver blkfront = {
.name = "vbd",
.owner = THIS_MODULE,
.ids = blkfront_ids,
.probe = blkfront_probe,
.remove = blkfront_remove,
.resume = blkfront_resume,
.otherend_changed = backend_changed,
.is_ready = blkfront_is_ready,
};
static const struct xenbus_device_id blkback_ids[] = {
{ "vbd" },
{ "" }
};
static struct xenbus_driver blkback = {
.name = "vbd",
.owner = THIS_MODULE,
.ids = blkback_ids,
.probe = blkback_probe,
.remove = blkback_remove,
.otherend_changed = frontend_changed
};
static int __init xlblk_init(void)
{
if (!is_running_on_xen())
return -ENODEV;
return xenbus_register_frontend(&blkfront);
}
module_init(xlblk_init);
int xenbus_register_frontend(struct xenbus_driver *drv)
{
int ret;
drv->read_otherend_details = read_backend_details;
ret = xenbus_register_driver_common(drv, &xenbus_frontend);
if (ret)
return ret;
/* If this driver is loaded as a module wait for devices to attach. */
wait_for_devices(drv);
return 0;
}
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus)
用struct xen_bus_type结构体(只有xenbus_frontend和xenbus_backend两种类型)初始化struct xenbus_driver里面struct device_driver类型结构体driver。
{
int ret;
if (bus->error)
return bus->error;
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
drv->driver.owner = drv->owner;
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
drv->driver.probe = xenbus_dev_probe;
drv->driver.remove = xenbus_dev_remove;
drv->driver.shutdown = xenbus_dev_shutdown;
#endif
mutex_lock(&xenwatch_mutex);
ret = driver_register(&drv->driver); // 注册device_driver
mutex_unlock(&xenwatch_mutex);
return ret;
}
/* Bus type for frontend drivers. */
static struct xen_bus_type xenbus_frontend = {
.root = "device",
.levels = 2, /* device/type/
.get_bus_id = frontend_bus_id,
.probe = xenbus_probe_frontend,
.error = -ENODEV,
.bus = {
.name = "xen",
.match = xenbus_match,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
.uevent = xenbus_uevent_frontend,
#endif
},
.dev = {
.bus_id = "xen",
},
};
static struct xen_bus_type xenbus_backend = {
.root = "backend",
.levels = 3, /* backend/type/
.get_bus_id = backend_bus_id,
.probe = xenbus_probe_backend,
.error = -ENODEV,
.bus = {
.name = "xen-backend",
.match = xenbus_match,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
// .shutdown = xenbus_dev_shutdown,
.uevent = xenbus_uevent_backend,
},
.dev = {
.bus_id = "xen-backend",
},
}
struct xen_bus_type
对总线结构体bus_type进行了包装
{
char *root;
int error;
unsigned int levels;
int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
int (*probe)(const char *type, const char *dir);
struct bus_type bus;
struct device dev;
};
Ø 通用设备模型结构体
1. struct bus_type
2. struct device_driver
3. struct device
struct bus_type {
描述了一个系统总线
const char * name;
struct subsystem subsys;
struct kset drivers;
struct kset devices;
struct klist klist_devices;
struct klist klist_drivers;
struct bus_attribute * bus_attrs;
struct device_attribute * dev_attrs;
struct driver_attribute * drv_attrs;
int (*match)(struct device * dev, struct device_driver * drv);
int (*uevent)(struct device *dev, char **envp,
int num_envp, char *buffer, int buffer_size);
int (*probe)(struct device * dev);
int (*remove)(struct device * dev);
void (*shutdown)(struct device * dev);
int (*suspend)(struct device * dev, pm_message_t state);
int (*resume)(struct device * dev);
}
struct device_driver {
描述了一个设备驱动程序
const char * name; // 驱动程序的名称
struct bus_type * bus; // 该驱动所管理的设备挂载到的总线
struct completion unloaded;
struct kobject kobj;
struct klist klist_devices;
struct klist_node knode_bus;
struct module * owner;
int (*probe) (struct device * dev);
int (*remove) (struct device * dev);
void (*shutdown) (struct device * dev);
int (*suspend) (struct device * dev, pm_message_t state);
int (*resume) (struct device * dev);
}
struct device {
描述了一个设备
struct klist klist_children;
struct klist_node knode_parent; /* node in sibling list */
struct klist_node knode_driver;
struct klist_node knode_bus;
struct device * parent;
struct kobject kobj;
char bus_id[BUS_ID_SIZE]; /* position on parent bus */
struct device_attribute uevent_attr;
struct device_attribute *devt_attr;
struct semaphore sem; /* semaphore to synchronize calls to
* its driver.
*/
struct bus_type * bus; /* type of bus device is on */
struct device_driver *driver; /* which driver has allocated this
device */
void *driver_data; /* data private to the driver */
void *platform_data; /* Platform specific data, device
core doesn't touch it */
void *firmware_data; /* Firmware specific data (e.g. ACPI,
BIOS data),reserved for device core*/
struct dev_pm_info power;
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;/* Like dma_mask, but for
alloc_coherent mappings as
not all hardware supports
64 bit addresses for consistent
allocations such descriptors. */
struct list_head dma_pools; /* dma pools (if dma'ble) */
struct dma_coherent_mem *dma_mem; /* internal for coherent mem
override */
/* class_device migration path */
struct list_head node;
struct class *class; /* optional*/
dev_t devt; /* dev_t, creates the sysfs "dev" */
void (*release)(struct device * dev);
}
/* Inter-domain shared memory communications. */
#define XENSTORE_RING_SIZE 1024
typedef uint32_t XENSTORE_RING_IDX;
#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
struct xenstore_domain_interface {
char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
XENSTORE_RING_IDX req_cons, req_prod;
XENSTORE_RING_IDX rsp_cons, rsp_prod;
};
Xenstore的读写
struct xenstore_domain_interface就是xenstore的页面。读写请求的buf都为1024K。xenstore消息的头部是struct xsd_sockmsg。
struct xsd_sockmsg
{
uint32_t type; /* XS_??? */
uint32_t req_id;/* Request identifier, echoed in daemon's response. */
uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
uint32_t len; /* Length of data following this. */
/* Generally followed by nul-terminated string(s). */
};
类型type有以下很多种:
enum xsd_sockmsg_type
{
XS_DEBUG,
XS_DIRECTORY,
XS_READ,
XS_GET_PERMS,
XS_WATCH,
XS_UNWATCH,
XS_TRANSACTION_START,
XS_TRANSACTION_END,
XS_INTRODUCE,
XS_RELEASE,
XS_GET_DOMAIN_PATH,
XS_WRITE,
XS_MKDIR,
XS_RM,
XS_SET_PERMS,
XS_WATCH_EVENT,
XS_ERROR,
XS_IS_DOMAIN_INTRODUCED,
XS_RESUME,
XS_SET_TARGET
}
Ø 读:
void *xenbus_read(struct xenbus_transaction t,
const char *dir, const char *node, unsigned int *len)
{
char *path;
void *ret;
path = join(dir, node);
if (IS_ERR(path))
return (void *)path;
ret = xs_single(t, XS_READ, path, len);
kfree(path);
return ret;
}
Ø 写:
/* Write the value of a single file.
* Returns -err on failure.
*/
int xenbus_write(struct xenbus_transaction t,
const char *dir, const char *node, const char *string)
{
const char *path;
struct kvec iovec[2];
int ret;
path = join(dir, node);
if (IS_ERR(path))
return PTR_ERR(path);
iovec[0].iov_base = (void *)path;
iovec[0].iov_len = strlen(path) + 1;
iovec[1].iov_base = (void *)string;
iovec[1].iov_len = strlen(string);
ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
kfree(path);
return ret;
}
EXPORT_SYMBOL_GPL(xenbus_write);
static void *xs_single(struct xenbus_transaction t,
enum xsd_sockmsg_type type,
const char *string,
unsigned int *len)
{
struct kvec iovec;
iovec.iov_base = (void *)string;
iovec.iov_len = strlen(string) + 1;
return xs_talkv(t, type, &iovec, 1, len);
}
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
static void *xs_talkv(struct xenbus_transaction t,
enum xsd_sockmsg_type type,
const struct kvec *iovec,
unsigned int num_vecs,
unsigned int *len)
{
struct xsd_sockmsg msg;
void *ret = NULL;
unsigned int i;
int err;
msg.tx_id = t.id;
msg.req_id = 0;
msg.type = type;
msg.len = 0;
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len; // 获得所有消息中数据的长度
mutex_lock(&xs_state.request_mutex);
err = xb_write(&msg, sizeof(msg)); // 写消息的头部
if (err) {
mutex_unlock(&xs_state.request_mutex);
return ERR_PTR(err);
}
for (i = 0; i < num_vecs; i++) {
err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; // 写消息的本体,可能分块
if (err) {
mutex_unlock(&xs_state.request_mutex);
return ERR_PTR(err);
}
}
ret = read_reply(&msg.type, len); // 获得返回值
mutex_unlock(&xs_state.request_mutex);
if (IS_ERR(ret))
return ret;
if (msg.type == XS_ERROR) {
err = get_error(ret);
kfree(ret);
return ERR_PTR(-err);
}
if (msg.type != type) {
if (printk_ratelimit())
printk(KERN_WARNING
"XENBUS unexpected type [%d], expected [%d]\n",
msg.type, type);
kfree(ret);
return ERR_PTR(-EINVAL);
}
return ret;
}
int xb_write(const void *data, unsigned len)
{
struct xenstore_domain_interface *intf = xen_store_interface;
XENSTORE_RING_IDX cons, prod;
int rc;
while (len != 0) {
void *dst;
unsigned int avail;
rc = wait_event_interruptible(
xb_waitq, // 挂载到xb-waitq等待队列等待。条件是有请求存在。
(intf->req_prod - intf->req_cons) !=
XENSTORE_RING_SIZE);
if (rc < 0)
return rc;
/* Read indexes, then verify. */
cons = intf->req_cons;
prod = intf->req_prod;
if (!check_indexes(cons, prod)) {
intf->req_cons = intf->req_prod = 0;
return -EIO;
}
dst = get_output_chunk(cons, prod, intf->req, &avail);
if (avail == 0) // 获得要写入的请求缓冲区的开始位置,以及可以写入的长度。
continue;
if (avail > len)
avail = len;
/* Must write data /after/ reading the consumer index. */
mb();
memcpy(dst, data, avail);
data += avail;
len -= avail;
/* Other side must not see new producer until data is there. */
wmb();
intf->req_prod += avail;
/* Implies mb(): other side will see the updated producer. */
notify_remote_via_evtchn(xen_store_evtchn); // 发送事件通知
}
return 0;
}
static void *get_output_chunk(XENSTORE_RING_IDX cons,
XENSTORE_RING_IDX prod,
char *buf, uint32_t *len)
{
*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
*len = XENSTORE_RING_SIZE - (prod - cons);
return buf + MASK_XENSTORE_IDX(prod);
}
static inline void notify_remote_via_evtchn(int port)
{
struct evtchn_send send = { .port = port }; // xen_store_evtchn
VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send));
}