这里来分析一下ServiceManager的启动过程,ServiceManager是管理系统所有服务的进程,用于提供API给用户注册以及查找相应的服务。ServiceManager直接与binder驱动打交道去实现跨进程的IPC。下面首先来看一下ServiceManager启动的地方,它是在init.rc脚本,这个脚本会被init程序解析并执行其中不同的服务。首先来看ServiceManager的启动命令:
service servicemanager /system/bin/servicemanager class core user system group system critical onrestart restart healthd onrestart restart zygote onrestart restart media onrestart restart surfaceflinger onrestart restart drm
int main(int argc, char **argv) { struct binder_state *bs; void *svcmgr = BINDER_SERVICE_MANAGER; bs = binder_open(128*1024); if (binder_become_context_manager(bs)) { ALOGE("cannot become context manager (%s)\n", strerror(errno)); return -1; } svcmgr_handle = svcmgr; binder_loop(bs, svcmgr_handler); return 0; }
binder_state结构如下:
struct binder_state { int fd; void *mapped; unsigned mapsize; };
struct binder_state *binder_open(unsigned mapsize) { struct binder_state *bs; bs = malloc(sizeof(*bs)); if (!bs) { errno = ENOMEM; return 0; } bs->fd = open("/dev/binder", O_RDWR); if (bs->fd < 0) { fprintf(stderr,"binder: cannot open device (%s)\n", strerror(errno)); goto fail_open; } bs->mapsize = mapsize; bs->mapped = mmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, bs->fd, 0); if (bs->mapped == MAP_FAILED) { fprintf(stderr,"binder: cannot map device (%s)\n", strerror(errno)); goto fail_map; } return bs; fail_map: close(bs->fd); fail_open: free(bs); return 0; }
static const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, .unlocked_ioctl = binder_ioctl, .mmap = binder_mmap, .open = binder_open, .flush = binder_flush, .release = binder_release, }; static struct miscdevice binder_miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "binder", .fops = &binder_fops };
static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) return -ENOMEM; get_task_struct(current); proc->tsk = current; INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->wait); proc->default_priority = task_nice(current); binder_lock(__func__); binder_stats_created(BINDER_STAT_PROC); hlist_add_head(&proc->proc_node, &binder_procs); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); filp->private_data = proc; binder_unlock(__func__); return 0; }
struct binder_proc { struct hlist_node proc_node; //用来链接所有的binder_proc到binder_procs的节点 struct rb_root threads; //binder threads红黑树根节点,链接当前进程上所有的binder thread struct rb_root nodes; //nodes红黑树根节点,存放当前进程上所有的binder实体 struct rb_root refs_by_desc; //引用binder的红黑树根节点,通过decs id号来索引 struct rb_root refs_by_node; //引用binder的红黑树根节点,通过node来索引 int pid; //当前进程的group leader的进程号 struct vm_area_struct *vma; //用户空间内存映射地址 struct mm_struct *vma_vm_mm; //内核空间内存映射地址 struct task_struct *tsk; //保存当前进程的task struck struct files_struct *files; //保存打开的文件 struct hlist_node deferred_work_node; int deferred_work; void *buffer; //内核虚拟空间起始地址 ptrdiff_t user_buffer_offset; //用户映射地址和内核虚拟空间地址之间的偏移 struct list_head buffers; struct rb_root free_buffers; //free buffer的红黑树根节点 struct rb_root allocated_buffers; size_t free_async_space; struct page **pages; //实际物理内存页面 size_t buffer_size; //分配的内存大小 uint32_t buffer_free; //剩下的free buffer struct list_head todo; //待完成的事务 wait_queue_head_t wait; //等待信号 struct binder_stats stats; //当前binder的状态记录 struct list_head delivered_death; int max_threads; int requested_threads; int requested_threads_started; int ready_threads; long default_priority; struct dentry *debugfs_entry; };
static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { int ret; struct vm_struct *area; struct binder_proc *proc = filp->private_data; const char *failure_string; struct binder_buffer *buffer; if ((vma->vm_end - vma->vm_start) > SZ_4M) vma->vm_end = vma->vm_start + SZ_4M; if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) { ret = -EPERM; failure_string = "bad vm_flags"; goto err_bad_arg; } vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE; mutex_lock(&binder_mmap_lock); if (proc->buffer) { ret = -EBUSY; failure_string = "already mapped"; goto err_already_mapped; } area = get_vm_area(vma->vm_end - vma->vm_start, VM_IOREMAP); if (area == NULL) { ret = -ENOMEM; failure_string = "get_vm_area"; goto err_get_vm_area_failed; } proc->buffer = area->addr; proc->user_buffer_offset = vma->vm_start - (uintptr_t)proc->buffer; mutex_unlock(&binder_mmap_lock); proc->pages = kzalloc(sizeof(proc->pages[0]) * ((vma->vm_end - vma->vm_start) / PAGE_SIZE), GFP_KERNEL); if (proc->pages == NULL) { ret = -ENOMEM; failure_string = "alloc page array"; goto err_alloc_pages_failed; } proc->buffer_size = vma->vm_end - vma->vm_start; vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; if (binder_update_page_range(proc, 1, proc->buffer, proc->buffer + PAGE_SIZE, vma)) { ret = -ENOMEM; failure_string = "alloc small buf"; goto err_alloc_small_buf_failed; } buffer = proc->buffer; INIT_LIST_HEAD(&proc->buffers); list_add(&buffer->entry, &proc->buffers); buffer->free = 1; binder_insert_free_buffer(proc, buffer); proc->free_async_space = proc->buffer_size / 2; barrier(); proc->files = get_files_struct(proc->tsk); proc->vma = vma; proc->vma_vm_mm = vma->vm_mm; return 0; }
接下来就调用binder_update_page_range分配实际的物理内存页面了并映射到用户和内核空间:
static int binder_update_page_range(struct binder_proc *proc, int allocate, void *start, void *end, struct vm_area_struct *vma) { void *page_addr; unsigned long user_page_addr; struct vm_struct tmp_area; struct page **page; struct mm_struct *mm; if (end <= start) return 0; trace_binder_update_page_range(proc, allocate, start, end); if (vma) mm = NULL; else mm = get_task_mm(proc->tsk); if (mm) { down_write(&mm->mmap_sem); vma = proc->vma; if (vma && mm != proc->vma_vm_mm) { pr_err("binder: %d: vma mm and task mm mismatch\n", proc->pid); vma = NULL; } } if (allocate == 0) goto free_range; if (vma == NULL) { printk(KERN_ERR "binder: %d: binder_alloc_buf failed to " "map pages in userspace, no vma\n", proc->pid); goto err_no_vma; } for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; struct page **page_array_ptr; page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; BUG_ON(*page); *page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); if (*page == NULL) { printk(KERN_ERR "binder: %d: binder_alloc_buf failed " "for page at %p\n", proc->pid, page_addr); goto err_alloc_page_failed; } tmp_area.addr = page_addr; tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; page_array_ptr = page; ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); if (ret) { printk(KERN_ERR "binder: %d: binder_alloc_buf failed " "to map page at %p in kernel\n", proc->pid, page_addr); goto err_map_kernel_failed; } user_page_addr = (uintptr_t)page_addr + proc->user_buffer_offset; ret = vm_insert_page(vma, user_page_addr, page[0]); if (ret) { printk(KERN_ERR "binder: %d: binder_alloc_buf failed " "to map page at %lx in userspace\n", proc->pid, user_page_addr); goto err_vm_insert_page_failed; } /* vm_insert_page does not seem to increment the refcount */ } if (mm) { up_write(&mm->mmap_sem); mmput(mm); } return 0; }
回到service_manager中会调用binder_become_context_manager让serviceManager成为binder的管理者:
int binder_become_context_manager(struct binder_state *bs) { return ioctl(bs->fd, BINDER_SET_CONTEXT_MGR, 0); }
static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; struct binder_proc *proc = filp->private_data; struct binder_thread *thread; unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; goto err; } case BINDER_SET_CONTEXT_MGR: if (binder_context_mgr_node != NULL) { printk(KERN_ERR "binder: BINDER_SET_CONTEXT_MGR already set\n"); ret = -EBUSY; goto err; } ret = security_binder_set_context_mgr(proc->tsk); if (ret < 0) goto err; if (binder_context_mgr_uid != -1) { if (binder_context_mgr_uid != current->cred->euid) { printk(KERN_ERR "binder: BINDER_SET_" "CONTEXT_MGR bad uid %d != %d\n", current->cred->euid, binder_context_mgr_uid); ret = -EPERM; goto err; } } else binder_context_mgr_uid = current->cred->euid; binder_context_mgr_node = binder_new_node(proc, NULL, NULL); if (binder_context_mgr_node == NULL) { ret = -ENOMEM; goto err; } binder_context_mgr_node->local_weak_refs++; binder_context_mgr_node->local_strong_refs++; binder_context_mgr_node->has_strong_ref = 1; binder_context_mgr_node->has_weak_ref = 1; break; if (thread) thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN;
binder_ioctl函数首先调用binder_get_thread获取当前调用操作的binder thread:
static struct binder_thread *binder_get_thread(struct binder_proc *proc) { struct binder_thread *thread = NULL; struct rb_node *parent = NULL; struct rb_node **p = &proc->threads.rb_node; while (*p) { parent = *p; thread = rb_entry(parent, struct binder_thread, rb_node); if (current->pid < thread->pid) p = &(*p)->rb_left; else if (current->pid > thread->pid) p = &(*p)->rb_right; else break; } if (*p == NULL) { thread = kzalloc(sizeof(*thread), GFP_KERNEL); if (thread == NULL) return NULL; binder_stats_created(BINDER_STAT_THREAD); thread->proc = proc; thread->pid = current->pid; init_waitqueue_head(&thread->wait); INIT_LIST_HEAD(&thread->todo); rb_link_node(&thread->rb_node, parent, p); rb_insert_color(&thread->rb_node, &proc->threads); thread->looper |= BINDER_LOOPER_STATE_NEED_RETURN; thread->return_error = BR_OK; thread->return_error2 = BR_OK; } return thread; }
struct binder_thread { struct binder_proc *proc; //进程的binder_proc结构 struct rb_node rb_node; //通过rb_node链接到binder_proc中的threads红黑树 int pid; //进程的pid int looper; //binder_thread的状态 struct binder_transaction *transaction_stack; // struct list_head todo; //待处理事务列表 uint32_t return_error; /* Write failed, return error code in read buf */ uint32_t return_error2; /* Write failed, return error code in read */ /* buffer. Used when sending a reply to a dead process that */ /* we are also waiting on */ wait_queue_head_t wait; struct binder_stats stats; };
接着来看处理BINDER_SET_CONTEXT_MGR命令的代码,binder_context_mgr_node为记录serviceManage的binder_node,binder_context_mgr_uid记录serviceManager的uid信息,这里将它置为serviceManger进程的uid;并调用binder_new_node为binder_context_mgr_node分配binder_node结构:
static struct binder_node *binder_new_node(struct binder_proc *proc, void __user *ptr, void __user *cookie) { struct rb_node **p = &proc->nodes.rb_node; struct rb_node *parent = NULL; struct binder_node *node; while (*p) { parent = *p; node = rb_entry(parent, struct binder_node, rb_node); if (ptr < node->ptr) p = &(*p)->rb_left; else if (ptr > node->ptr) p = &(*p)->rb_right; else return NULL; } node = kzalloc(sizeof(*node), GFP_KERNEL); if (node == NULL) return NULL; binder_stats_created(BINDER_STAT_NODE); rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); node->debug_id = ++binder_last_id; node->proc = proc; node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "binder: %d:%d node %d u%p c%p created\n", proc->pid, current->pid, node->debug_id, node->ptr, node->cookie); return node; }
因为service_manager进程是第一次创建binder_node,所以在binder_proc上的node红黑树初始化时是空的。然后就创建一个新的binder_node结构,先来看一下数据结构:
struct binder_node { int debug_id; struct binder_work work; //表示binder_node的type union { struct rb_node rb_node; //链接所有的binder_node到binder_proc的nodes红黑树中 struct hlist_node dead_node; //链接所有的dead node到全局binder_dead_nodes }; struct binder_proc *proc; //属于的binder_proc结构 struct hlist_head refs; // int internal_strong_refs; int local_weak_refs; int local_strong_refs; void __user *ptr; //指向binder的weak refs void __user *cookie; //指向binder本身 unsigned has_strong_ref:1; unsigned pending_strong_ref:1; unsigned has_weak_ref:1; unsigned pending_weak_ref:1; unsigned has_async_transaction:1; unsigned accept_fds:1; unsigned min_priority:8; struct list_head async_todo; };
void binder_loop(struct binder_state *bs, binder_handler func) { int res; struct binder_write_read bwr; unsigned readbuf[32]; bwr.write_size = 0; bwr.write_consumed = 0; bwr.write_buffer = 0; readbuf[0] = BC_ENTER_LOOPER; binder_write(bs, readbuf, sizeof(unsigned)); for (;;) { bwr.read_size = sizeof(readbuf); bwr.read_consumed = 0; bwr.read_buffer = (unsigned) readbuf; res = ioctl(bs->fd, BINDER_WRITE_READ, &bwr); if (res < 0) { ALOGE("binder_loop: ioctl failed (%s)\n", strerror(errno)); break; } res = binder_parse(bs, 0, readbuf, bwr.read_consumed, func); if (res == 0) { ALOGE("binder_loop: unexpected reply?!\n"); break; } if (res < 0) { ALOGE("binder_loop: io error %d %s\n", res, strerror(errno)); break; } } }
int binder_write(struct binder_state *bs, void *data, unsigned len) { struct binder_write_read bwr; int res; bwr.write_size = len; bwr.write_consumed = 0; bwr.write_buffer = (unsigned) data; bwr.read_size = 0; bwr.read_consumed = 0; bwr.read_buffer = 0; res = ioctl(bs->fd, BINDER_WRITE_READ, &bwr); if (res < 0) { fprintf(stderr,"binder_write: ioctl failed (%s)\n", strerror(errno)); } return res; }
首先声明一个binder_write_read结构,binder_write_read是在用户空间和内核空间传递数据的结构,定义如下:
struct binder_write_read { signed long write_size; /* bytes to write */ signed long write_consumed; /* bytes consumed by driver */ unsigned long write_buffer; signed long read_size; /* bytes to read */ signed long read_consumed; /* bytes consumed by driver */ unsigned long read_buffer; };
在binder_write中,向binder驱动发送一个BINDER_WRITE_READ指令,带有一个binder_write_read数据结构,它里面只有write_buffer和write_size,read_size和read_buffer都为空,来看binder驱动如何处理这个请求:
static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; struct binder_proc *proc = filp->private_data; struct binder_thread *thread; unsigned int size = _IOC_SIZE(cmd); void __user *ubuf = (void __user *)arg; binder_lock(__func__); thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; goto err; } switch (cmd) { case BINDER_WRITE_READ: { struct binder_write_read bwr; if (size != sizeof(struct binder_write_read)) { ret = -EINVAL; goto err; } if (copy_from_user(&bwr, ubuf, sizeof(bwr))) { ret = -EFAULT; goto err; } if (bwr.write_size > 0) { ret = binder_thread_write(proc, thread, (void __user *)bwr.write_buffer, bwr.write_size, &bwr.write_consumed); trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto err; } } if (bwr.read_size > 0) { ret = binder_thread_read(proc, thread, (void __user *)bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); if (!list_empty(&proc->todo)) wake_up_interruptible(&proc->wait); if (ret < 0) { if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; goto err; } } if (copy_to_user(ubuf, &bwr, sizeof(bwr))) { ret = -EFAULT; goto err; } break; } err: if (thread) thread->looper &= ~BINDER_LOOPER_STATE_NEED_RETURN;
int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, void __user *buffer, int size, signed long *consumed) { uint32_t cmd; void __user *ptr = buffer + *consumed; void __user *end = buffer + size; while (ptr < end && thread->return_error == BR_OK) { if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); trace_binder_command(cmd); switch (cmd) { case BC_ENTER_LOOPER: if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("binder: %d:%d ERROR:" " BC_ENTER_LOOPER called after " "BC_REGISTER_LOOPER\n", proc->pid, thread->pid); } thread->looper |= BINDER_LOOPER_STATE_ENTERED; break;
首先从binder_write_read数据结构中的writer_buffer取出BC_ENTER_LOOPER命令,然后将binder_thread的looper设置位BINDER_LOOPER_STATE_ENTERED,表示进入到looper循环当中了。处理完BC_ENTER_LOOPER命令后,在binder_looper方法中接着向binder驱动发送BINDER_WRITE_READ,这次带有的binder_write_read参数中,只有read_size不为0,write_size为0,通过上面的知识我们知道,这里会调用binder_thread_read来处理:
static int binder_thread_read(struct binder_proc *proc, struct binder_thread *thread, void __user *buffer, int size, signed long *consumed, int non_block) { void __user *ptr = buffer + *consumed; void __user *end = buffer + size; int ret = 0; int wait_for_proc_work; if (*consumed == 0) { if (put_user(BR_NOOP, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); } retry: wait_for_proc_work = thread->transaction_stack == NULL && list_empty(&thread->todo); if (thread->return_error != BR_OK && ptr < end) { } thread->looper |= BINDER_LOOPER_STATE_WAITING; if (wait_for_proc_work) proc->ready_threads++; binder_unlock(__func__); if (wait_for_proc_work) { if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED))) { } binder_set_nice(proc->default_priority); if (non_block) { } else ret = wait_event_freezable_exclusive(proc->wait, binder_has_proc_work(proc, thread)); } else { }
因为在binder_loop中设置的read_consumed等于0,所以这里会先往read_buffer写入一个BR_NOOP命令。由于刚之前创建的binder_thread中的transaction_stack和todo列表都是空,所以这里的wait_for_proc_work为true,表示需要等待客户端的请求,并将binder_thread的looper置或上BINDER_LOOPER_STATE_WAITING,由于之前执行BC_ENTER_LOOPER命令,所以现在looper的值BINDER_LOOPER_STATE_ENTERED | BINDER_LOOPER_STATE_WAITING。由于在打开/dev/binder中,并没有设置O_NONBLOCK标志,所以这里的non_block为false。最后这里调用wait_event_freezable_exclusive等待客户端的请求。流程如下: