- struct backing_dev_info {
- struct list_head bdi_list;
- unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
- unsigned long state; /* Always use atomic bitops on this */
- unsigned int capabilities; /* Device capabilities */
- congested_fn *congested_fn; /* Function pointer if device is md/dm */
- void *congested_data; /* Pointer to aux data for congested func */
- char *name;
- struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
- unsigned long bw_time_stamp; /* last time write bw is updated */
- unsigned long dirtied_stamp;
- unsigned long written_stamp; /* pages written at bw_time_stamp */
- unsigned long write_bandwidth; /* the estimated write bandwidth */
- unsigned long avg_write_bandwidth; /* further smoothed write bw */
- /*
- * The base dirty throttle rate, re-calculated on every 200ms.
- * All the bdi tasks' dirty rate will be curbed under it.
- * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit
- * in small steps and is much more smooth/stable than the latter.
- */
- unsigned long dirty_ratelimit;
- unsigned long balanced_dirty_ratelimit;
- struct prop_local_percpu completions;
- int dirty_exceeded;
- unsigned int min_ratio;
- unsigned int max_ratio, max_prop_frac;
- struct bdi_writeback wb; /* default writeback info for this bdi,writeback对象 */
- spinlock_t wb_lock; /* protects work_list */
- /* 任务链表 */
- struct list_head work_list;
- struct device *dev;
- /* 在laptop模式下应用的定时器 */
- struct timer_list laptop_mode_wb_timer;
- #ifdef CONFIG_DEBUG_FS
- struct dentry *debug_dir;
- struct dentry *debug_stats;
- #endif
- };
- struct bdi_writeback {
- struct backing_dev_info *bdi; /* our parent bdi */
- unsigned int nr;
- unsigned long last_old_flush; /* last old data flush */
- unsigned long last_active; /* last time bdi thread was active */
- struct task_struct *task; /* writeback thread */
- struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
- struct list_head b_dirty; /* dirty inodes */
- struct list_head b_io; /* parked for writeback */
- struct list_head b_more_io; /* parked for more writeback */
- spinlock_t list_lock; /* protects the b_* lists */
- };
writeback work
- struct wb_writeback_work {
- long nr_pages;
- struct super_block *sb; /* superblock对象 */
- unsigned long *older_than_this;
- enum writeback_sync_modes sync_mode;
- unsigned int tagged_writepages:1;
- unsigned int for_kupdate:1;
- unsigned int range_cyclic:1;
- unsigned int for_background:1;
- enum wb_reason reason; /* why was writeback initiated? */
- struct list_head list; /* pending work list,链入bdi-> work_list队列 */
- struct completion *done; /* set if the caller waits,work完成时通知调用者 */
- };
- static int bdi_forker_thread(void *ptr)
- {
- struct bdi_writeback *me = ptr;
- current->flags |= PF_SWAPWRITE;
- set_freezable();
- /*
- * Our parent may run at a different priority, just set us to normal
- */
- set_user_nice(current, 0);
- for (;;) {
- struct task_struct *task = NULL;
- struct backing_dev_info *bdi;
- enum {
- NO_ACTION, /* Nothing to do */
- FORK_THREAD, /* Fork bdi thread */
- KILL_THREAD, /* Kill inactive bdi thread */
- } action = NO_ACTION;
- /*
- * Temporary measure, we want to make sure we don't see
- * dirty data on the default backing_dev_info
- */
- if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
- del_timer(&me->wakeup_timer);
- wb_do_writeback(me, 0);
- }
- spin_lock_bh(&bdi_lock);
- /*
- * In the following loop we are going to check whether we have
- * some work to do without any synchronization with tasks
- * waking us up to do work for them. Set the task state here
- * so that we don't miss wakeups after verifying conditions.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- /* 遍历所有的bdi对象,检查这些bdi是否存在脏数据,如果有脏数据,那么需要为其fork线程,然后做writeback操作 */
- list_for_each_entry(bdi, &bdi_list, bdi_list) {
- bool have_dirty_io;
- if (!bdi_cap_writeback_dirty(bdi) ||
- bdi_cap_flush_forker(bdi))
- continue;
- WARN(!test_bit(BDI_registered, &bdi->state),
- "bdi %p/%s is not registered!\n", bdi, bdi->name);
- /* 检查是否存在脏数据 */
- have_dirty_io = !list_empty(&bdi->work_list) ||
- wb_has_dirty_io(&bdi->wb);
- /*
- * If the bdi has work to do, but the thread does not
- * exist - create it.
- */
- if (!bdi->wb.task && have_dirty_io) {
- /*
- * Set the pending bit - if someone will try to
- * unregister this bdi - it'll wait on this bit.
- */
- /* 如果有脏数据,并且不存在线程,那么接下来做线程的FORK操作 */
- set_bit(BDI_pending, &bdi->state);
- action = FORK_THREAD;
- break;
- }
- spin_lock(&bdi->wb_lock);
- /*
- * If there is no work to do and the bdi thread was
- * inactive long enough - kill it. The wb_lock is taken
- * to make sure no-one adds more work to this bdi and
- * wakes the bdi thread up.
- */
- /* 如果一个bdi长时间没有脏数据,那么执行线程的KILL操作,结束掉该bdi对应的writeback线程 */
- if (bdi->wb.task && !have_dirty_io &&
- time_after(jiffies, bdi->wb.last_active +
- bdi_longest_inactive())) {
- task = bdi->wb.task;
- bdi->wb.task = NULL;
- spin_unlock(&bdi->wb_lock);
- set_bit(BDI_pending, &bdi->state);
- action = KILL_THREAD;
- break;
- }
- spin_unlock(&bdi->wb_lock);
- }
- spin_unlock_bh(&bdi_lock);
- /* Keep working if default bdi still has things to do */
- if (!list_empty(&me->bdi->work_list))
- __set_current_state(TASK_RUNNING);
- /* 执行线程的FORK和KILL操作 */
- switch (action) {
- case FORK_THREAD:
- /* FORK一个bdi_writeback_thread线程,该线程的名字为flush-major:minor */
- __set_current_state(TASK_RUNNING);
- task = kthread_create(bdi_writeback_thread, &bdi->wb,
- "flush-%s", dev_name(bdi->dev));
- if (IS_ERR(task)) {
- /*
- * If thread creation fails, force writeout of
- * the bdi from the thread. Hopefully 1024 is
- * large enough for efficient IO.
- */
- writeback_inodes_wb(&bdi->wb, 1024,
- WB_REASON_FORKER_THREAD);
- } else {
- /*
- * The spinlock makes sure we do not lose
- * wake-ups when racing with 'bdi_queue_work()'.
- * And as soon as the bdi thread is visible, we
- * can start it.
- */
- spin_lock_bh(&bdi->wb_lock);
- bdi->wb.task = task;
- spin_unlock_bh(&bdi->wb_lock);
- wake_up_process(task);
- }
- bdi_clear_pending(bdi);
- break;
- case KILL_THREAD:
- /* KILL一个线程 */
- __set_current_state(TASK_RUNNING);
- kthread_stop(task);
- bdi_clear_pending(bdi);
- break;
- case NO_ACTION:
- /* 如果没有可执行的动作,那么调度本线程睡眠一段时间 */
- if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
- /*
- * There are no dirty data. The only thing we
- * should now care about is checking for
- * inactive bdi threads and killing them. Thus,
- * let's sleep for longer time, save energy and
- * be friendly for battery-driven devices.
- */
- schedule_timeout(bdi_longest_inactive());
- else
- schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
- try_to_freeze();
- break;
- }
- }
- return 0;
- }
writeback线程
- long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
- {
- struct backing_dev_info *bdi = wb->bdi;
- struct wb_writeback_work *work;
- long wrote = 0;
- set_bit(BDI_writeback_running, &wb->bdi->state);
- /* 处理等待的work,所有等待work pengding在bdi->work_list上 */
- while ((work = get_next_work_item(bdi)) != NULL) {
- /*
- * Override sync mode, in case we must wait for completion
- * because this thread is exiting now.
- */
- if (force_wait)
- work->sync_mode = WB_SYNC_ALL;
- trace_writeback_exec(bdi, work);
- /* 调用wb_writeback函数处理相应的inode */
- wrote += wb_writeback(wb, work);
- /*
- * Notify the caller of completion if this is a synchronous
- * work item, otherwise just free it.
- */
- /* 通知上层软件,相应的work已经完成 */
- if (work->done)
- complete(work->done);
- else
- kfree(work);
- }
- /*
- * Check for periodic writeback, kupdated() style
- */
- /* 处理周期性的dirty page刷新作业,buffer cache就会走这条路径,在下面的函数中会创建work,并且调用wb_writeback函数进行处理 */
- wrote += wb_check_old_data_flush(wb);
- wrote += wb_check_background_flush(wb);
- clear_bit(BDI_writeback_running, &wb->bdi->state);
- return wrote;
- }