直接来看下ftrace打印出来的路径:
3) | vfs_write() { 3) | do_sync_write() { 3) 2.805 us | ocfs2_file_aio_write(); 3) 3.285 us | } 3) 4.392 us | }
vfs_write()代码逻辑很简单,检查参数,确认用户态buf可用,回调具体文件系统实现的write方法,然后触发fnotify,
更新进程IO记帐,包括写字节数和自增写系统调用数。
404 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 405 { 406 ssize_t ret; 407 408 if (!(file->f_mode & FMODE_WRITE)) 409 return -EBADF; 410 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 411 return -EINVAL; 412 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 413 return -EFAULT; 414 415 ret = rw_verify_area(WRITE, file, pos, count); 416 if (ret >= 0) { 417 count = ret; 418 if (file->f_op->write) 419 ret = file->f_op->write(file, buf, count, pos); 420 else 421 ret = do_sync_write(file, buf, count, pos); 422 if (ret > 0) { 423 fsnotify_modify(file); 424 add_wchar(current, ret); 425 } 426 inc_syscw(current); 427 } 428 429 return ret; 430 }
ocfs2的文件操作表,并没有直接将.write和ocfs2_file_aio_write()挂钩,fs/ocfs2/file.c:
2656 const struct file_operations ocfs2_fops = { 2657 .llseek = generic_file_llseek, 2658 .read = do_sync_read, 2659 .write = do_sync_write, 2660 .mmap = ocfs2_mmap, 2661 .fsync = ocfs2_sync_file, 2662 .release = ocfs2_file_release, 2663 .open = ocfs2_file_open, 2664 .aio_read = ocfs2_file_aio_read, 2665 .aio_write = ocfs2_file_aio_write, 2666 .unlocked_ioctl = ocfs2_ioctl, 2667 #ifdef CONFIG_COMPAT 2668 .compat_ioctl = ocfs2_compat_ioctl, 2669 #endif 2670 .lock = ocfs2_lock, 2671 .flock = ocfs2_flock, 2672 .splice_read = ocfs2_file_splice_read, 2673 .splice_write = ocfs2_file_splice_write, 2674 .fallocate = ocfs2_fallocate, 2675 }; 2676 2677 const struct file_operations ocfs2_dops = { 2678 .llseek = generic_file_llseek, 2679 .read = generic_read_dir, 2680 .readdir = ocfs2_readdir, 2681 .fsync = ocfs2_sync_file, 2682 .release = ocfs2_dir_release, 2683 .open = ocfs2_dir_open, 2684 .unlocked_ioctl = ocfs2_ioctl, 2685 #ifdef CONFIG_COMPAT 2686 .compat_ioctl = ocfs2_compat_ioctl, 2687 #endif 2688 .lock = ocfs2_lock, 2689 .flock = ocfs2_flock, 2690 };
do_sync_write()是vfs层的出口,也是ocfs2文件系统层的入口,具体文件系统接下来是要跟快设备打交道的,是时候甩掉vfs层的东西了,从参数上来看就是把file结构体,用户态buf,文件指针ppos,转换到kiocb和iovec结构体。 kiocb很多子段是从file结构体mirror的,还有字段指向进程。
我测试了下,读写同一个文件情况下wait_on_retry_sync_kiocb()和wait_on_sync_kiocb()都没有被调用。至于,什么时候会被调用,还不清楚。
fs/read_write.c:
378 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 379 { 380 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 381 struct kiocb kiocb; 382 ssize_t ret; 383 384 init_sync_kiocb(&kiocb, filp); 385 kiocb.ki_pos = *ppos; 386 kiocb.ki_left = len; 387 kiocb.ki_nbytes = len; 388 389 for (;;) { 390 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 391 if (ret != -EIOCBRETRY) 392 break; 393 wait_on_retry_sync_kiocb(&kiocb); 394 } 395 396 if (-EIOCBQUEUED == ret) 397 ret = wait_on_sync_kiocb(&kiocb); 398 *ppos = kiocb.ki_pos; 399 return ret; 400 }