Linux那些事儿之我是Block层(6)浓缩就是精华?(三)

第三个,blk_register_queue().

   4079 int blk_register_queue(struct gendisk *disk)

   4080 {

   4081         int ret;

   4082

   4083         request_queue_t *q = disk->queue;

   4084

   4085         if (!q || !q->request_fn)

   4086                 return -ENXIO;

   4087

   4088         q->kobj.parent = kobject_get(&disk->kobj);  //add sys :   sys/block/mmcblk0/queue

   4089

   4090         ret = kobject_add(&q->kobj);

   4091         if (ret < 0)

   4092                 return ret;

   4093

   4094         kobject_uevent(&q->kobj, KOBJ_ADD);

   4095

   4096         ret = elv_register_queue(q);

   4097         if (ret) {

   4098                 kobject_uevent(&q->kobj, KOBJ_REMOVE);

   4099                 kobject_del(&q->kobj);

   4100                 return ret;

   4101         }

   4102

   4103         return 0;

   4104 }

首先,4090行这个kobject_add很好解释,/sys/block/sda/目录下面又多一个子目录而已,但问题是,这个q究竟是什么?这里我们把disk->queue赋给了它,disk->queue又是什么呢?回过头去看sd_probe(),当时我们有这么一句,

   1662         gd->queue = sdkp->device->request_queue;

sdkpstruct scsi_disk结构体指针,device成员是struct scsi_device指针,那么这个request_queue?struct request_queue结构体指针,表示的是一个请求队列.但它是从哪儿来的呢?一路走来的兄弟们可能会猜到,事实上scsi设备驱动和usb设备驱动有一点是相同的,在它们的probe函数被调用之前,核心层实际上已经为它们做了许多工作了.比如usb那边就是为usb设备申请usb_device结构体变量,而这边也是如此,申请了scsi_device结构体变量,为它的一些成员赋好了值,这其中就包括了这个请求队列.

准确地说,scsi总线扫描的时候,每当探测到一个设备,就会调用scsi_alloc_sdev()函数,这个函数我们无意多说,但是可以告诉你的是,它会调用一个叫做scsi_alloc_queue()的函数.而这个函数涉及到很多block层提供的函数,所以我们不得不从这里开始看起,来自drivers/scsi/scsi_lib.c:


    mmc_init_queue   // mmc/card/queue.c    //  


   1569 struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,

   1570                                          request_fn_proc *request_fn)

   1571 {

   1572         struct request_queue *q;

   1573

   1574         q = blk_init_queue(request_fn, NULL);

   1575         if (!q)

   1576                 return NULL;

   1577

   1578         blk_queue_max_hw_segments(q, shost->sg_tablesize);

   1579         blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);

   1580         blk_queue_max_sectors(q, shost->max_sectors);

   1581         blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));

   1582         blk_queue_segment_boundary(q, shost->dma_boundary);

   1583

   1584         if (!shost->use_clustering)

   1585                 clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);

   1586         return q;

   1587 }

   1588 EXPORT_SYMBOL(__scsi_alloc_queue);

   1589

   1590 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)

   1591 {

   1592         struct request_queue *q;

   1593

   1594         q = __scsi_alloc_queue(sdev->host, scsi_request_fn);

   1595         if (!q)

   1596                 return NULL;

   1597

   1598         blk_queue_prep_rq(q, scsi_prep_fn);

   1599         blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);

   1600         blk_queue_softirq_done(q, scsi_softirq_done);

   1601         return q;

   1602 }

这两个函数因为调用关系所以一并贴了出来.

我们首先要看的很自然就是blk_init_queue(),它来自block/ll_rw_blk.c:

   1860 /**

   1861  * blk_init_queue  - prepare a request queue for use with a block device

   1862  * @rfn:  The function to be called to process requests that have been

   1863  *        placed on the queue.

   1864  * @lock: Request queue spin lock

   1865  *

   1866  * Description:

   1867  *    If a block device wishes to use the standard request handling procedures,

   1868  *    which sorts requests and coalesces adjacent requests, then it must

   1869  *    call blk_init_queue().  The function @rfn will be called when there

   1870  *    are requests on the queue that need to be processed.  If the device

   1871  *    supports plugging, then @rfn may not be called immediately when requests

   1872  *    are available on the queue, but may be called at some time later instead.

   1873  *    Plugged queues are generally unplugged when a buffer belonging to one

   1874  *    of the requests on the queue is needed, or due to memory pressure.

   1875  *

   1876  *    @rfn is not required, or even expected, to remove all requests off the

   1877  *    queue, but only as many as it can handle at a time.  If it does leave

   1878  *    requests on the queue, it is responsible for arranging that the requests

   1879  *    get dealt with eventually.

   1880  *

   1881  *    The queue spin lock must be held while manipulating the requests on the

   1882  *    request queue; this lock will be taken also from interrupt context, so irq

   1883  *    disabling is needed for it.

   1884  *

   1885  *    Function returns a pointer to the initialized request queue, or NULL if

   1886  *    it didn't succeed.

   1887  *

   1888  * Note:

   1889  *    blk_init_queue() must be paired with a blk_cleanup_queue() call

   1890  *    when the block device is deactivated (such as at module unload).

   1891  **/

   1892

   1893 request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)

   1894 {

   1895         return blk_init_queue_node(rfn, lock, -1);

   1896 }

   1897 EXPORT_SYMBOL(blk_init_queue);

   1898

   1899 request_queue_t *

   1900 blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)

   1901 {

   1902         request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);

   1903

   1904         if (!q)

   1905                 return NULL;

   1906

   1907         q->node = node_id;

   1908         if (blk_init_free_list(q)) {

   1909                 kmem_cache_free(requestq_cachep, q);

   1910                 return NULL;

   1911         }

   1912

   1913         /*

   1914          * if caller didn't supply a lock, they get per-queue locking with

   1915          * our embedded lock

   1916          */

   1917         if (!lock) {

   1918                 spin_lock_init(&q->__queue_lock);

   1919                 lock = &q->__queue_lock;

   1920         }

   1921

   1922         q->request_fn           = rfn;

   1923         q->prep_rq_fn           = NULL;

   1924         q->unplug_fn            = generic_unplug_device;

   1925         q->queue_flags          = (1 << QUEUE_FLAG_CLUSTER);

   1926         q->queue_lock           = lock;

   1927

   1928         blk_queue_segment_boundary(q, 0xffffffff);

   1929

   1930         blk_queue_make_request(q, __make_request);

   1931         blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);

   1932

   1933         blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);

   1934         blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);

   1935

   1936         q->sg_reserved_size = INT_MAX;

   1937

   1938         /*

   1939          * all done

   1940          */

   1941         if (!elevator_init(q, NULL)) {

   1942                 blk_queue_congestion_threshold(q);

   1943                 return q;

   1944         }

   1945

   1946         blk_put_queue(q);

   1947         return NULL;

   1948 }

别看这些函数都很可怕,真正我们目前需要关注的其实只是其中的某几个而已.它们这个blk_alloc_queue_nodeelevator_init().前者来自block/ll_rw_blk.c,后者则来自block/elevator.c:

   1836 request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)

   1837 {

   1838         request_queue_t *q;

   1839

   1840         q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);

   1841         if (!q)

   1842                 return NULL;

   1843

   1844         memset(q, 0, sizeof(*q));

   1845         init_timer(&q->unplug_timer);

   1846

   1847         snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");

   1848         q->kobj.ktype = &queue_ktype;

   1849         kobject_init(&q->kobj);

   1850

   1851         q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;

   1852         q->backing_dev_info.unplug_io_data = q;

   1853

   1854         mutex_init(&q->sysfs_lock);

   1855

   1856         return q;

   1857 }

还记得本故事最早时期讲的那个blk_dev_init,当时我们调用kmem_cache_create()申请了一个内存池request_cachep,现在就该用它了.从这个池子里申请了一个struct request_queue_t结构体的空间,给了指针q,然后1844行初始化为0.1847行让qkobj.name等于”queue”,这就是为什么今后我们在/sys/block/sda/目录下面能看到一个叫做”queue”的目录.

[root@localhost ~]# ls /sys/block/sda/

capability  device   queue  removable  sda10  sda12  sda14  sda2  sda5  sda7  sda9  slaves  subsystem dev         holders  range  sda1       sda11  sda13  sda15  sda3  sda6  sda8  size  stat    uevent

而这个queue目录下面的内容是什么呢?

[root@localhost ~]# ls /sys/block/sda/queue/

iosched  max_hw_sectors_kb  max_sectors_kb  nr_requests  read_ahead_kb  scheduler

这几个文件从哪来的?注意1848行那个queue_ktype.

   4073 static struct kobj_type queue_ktype = {

   4074         .sysfs_ops      = &queue_sysfs_ops,

   4075         .default_attrs  = default_attrs,

   4076         .release        = blk_release_queue,

   4077 };

如果你真懂设备模型,那么你一定会去查看这个default_attrs是什么,

   3988 static struct queue_sysfs_entry queue_requests_entry = {

   3989         .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },

   3990         .show = queue_requests_show,

   3991         .store = queue_requests_store,

   3992 };

   3993

   3994 static struct queue_sysfs_entry queue_ra_entry = {

   3995         .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },

   3996         .show = queue_ra_show,

   3997         .store = queue_ra_store,

   3998 };

   3999

   4000 static struct queue_sysfs_entry queue_max_sectors_entry = {

   4001         .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },

   4002         .show = queue_max_sectors_show,

   4003         .store = queue_max_sectors_store,

   4004 };

   4005

   4006 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {

   4007         .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },

   4008         .show = queue_max_hw_sectors_show,

   4009 };

   4010

   4011 static struct queue_sysfs_entry queue_iosched_entry = {

   4012         .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },

   4013         .show = elv_iosched_show,

   4014         .store = elv_iosched_store,

   4015 };

   4016

   4017 static struct attribute *default_attrs[] = {

   4018         &queue_requests_entry.attr,

   4019         &queue_ra_entry.attr,

   4020         &queue_max_hw_sectors_entry.attr,

   4021         &queue_max_sectors_entry.attr,

   4022         &queue_iosched_entry.attr,

   4023         NULL,

   4024 };

看到了吗?是一个指针数组,按照设备模型的理论来说,这些就是定义了一些属性,kobject的属性,看到这些属性的name是不是和刚才那个queue目录下面的文件名字是一样的?没错,queue目录下面每个文件就是和这里这些属性一一对应的.不过有一个东西例外,它就是iosched,这不是一个文件,这是一个目录.

[root@localhost ~]# ls -l /sys/block/sdf/queue/

total 0

drwxr-xr-x 2 root root    0 Dec 14 02:46 iosched

-r--r--r-- 1 root root 4096 Dec 14 06:21 max_hw_sectors_kb

-rw-r--r-- 1 root root 4096 Dec 14 06:21 max_sectors_kb

-rw-r--r-- 1 root root 4096 Dec 14 06:21 nr_requests

-rw-r--r-- 1 root root 4096 Dec 14 06:21 read_ahead_kb

-rw-r--r-- 1 root root 4096 Dec 14 06:21 scheduler

[root@localhost ~]# ls /sys/block/sdf/queue/iosched/

back_seek_max      fifo_expire_async  quantum      slice_async_rq  slice_sync back_seek_penalty fifo_expire_sync   slice_async  slice_idle

关于这个目录,我们来看另一个函数,elevator_init(),来自block/elevator.c:

    220 int elevator_init(request_queue_t *q, char *name)

    221 {

    222         struct elevator_type *e = NULL;

    223         struct elevator_queue *eq;

    224         int ret = 0;

    225         void *data;

    226

    227         INIT_LIST_HEAD(&q->queue_head);

    228         q->last_merge = NULL;

    229         q->end_sector = 0;

    230         q->boundary_rq = NULL;

    231

    232         if (name && !(e = elevator_get(name)))

    233                 return -EINVAL;

    234

    235         if (!e && *chosen_elevator && !(e = elevator_get(chosen_elevator)))

    236                 printk("I/O scheduler %s not found/n", chosen_elevator);

    237

    238         if (!e && !(e = elevator_get(CONFIG_DEFAULT_IOSCHED))) {

    239                 printk("Default I/O scheduler not found, using no-op/n");

    240                 e = elevator_get("noop");

    241         }

    242

    243         eq = elevator_alloc(q, e);

    244         if (!eq)

    245                 return -ENOMEM;

    246

    247         data = elevator_init_queue(q, eq);

    248         if (!data) {

    249                 kobject_put(&eq->kobj);

    250                 return -ENOMEM;

    251         }

    252

    253         elevator_attach(q, eq, data);

    254         return ret;

    255 }

重点关注elevator_alloc().

    179 static elevator_t *elevator_alloc(request_queue_t *q, struct elevator_type *e)

    180 {

    181         elevator_t *eq;

    182         int i;

    183

    184         eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL, q->node);

    185         if (unlikely(!eq))

    186                 goto err;

    187

    188         memset(eq, 0, sizeof(*eq));

    189         eq->ops = &e->ops;

    190         eq->elevator_type = e;

    191         kobject_init(&eq->kobj);

    192         snprintf(eq->kobj.name, KOBJ_NAME_LEN, "%s", "iosched");

    193         eq->kobj.ktype = &elv_ktype;

    194         mutex_init(&eq->sysfs_lock);

    195

    196         eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,

    197                                         GFP_KERNEL, q->node);

    198         if (!eq->hash)

    199                 goto err;

    200

    201         for (i = 0; i < ELV_HASH_ENTRIES; i++)

    202                 INIT_HLIST_HEAD(&eq->hash[i]);

    203

    204         return eq;

    205 err:

    206         kfree(eq);

    207         elevator_put(e);

    208         return NULL;

    209 }

无非就是申请一个struct elevator_t结构体变量的空间并且初始化为0.

而真正引发我们兴趣的是192,很显然,就是因为这里把eqkobjname设置为”iosched”,才会让我们在queue目录下看到那个”iosched”子目录.

而这个子目录下那些乱七八糟的文件又来自哪里呢?正是下面这个elv_register_queue()函数,这个我们在blk_register_queue()中调用的函数.

    931 int elv_register_queue(struct request_queue *q)

    932 {

    933         elevator_t *e = q->elevator;

    934         int error;

    935

    936         e->kobj.parent = &q->kobj;

    937

    938         error = kobject_add(&e->kobj);

    939         if (!error) {

    940                 struct elv_fs_entry *attr = e->elevator_type->elevator_attrs;

    941                 if (attr) {

    942                         while (attr->attr.name) {

    943                                 if (sysfs_create_file(&e->kobj, &attr->attr))

    944                                         break;

    945                                 attr++;

    946                         }

    947                 }

    948                 kobject_uevent(&e->kobj, KOBJ_ADD);

    949         }

    950         return error;

    951 }

936行保证了,iosched是出现在queue目录下而不是出现在别的地方,942行这个while循环则是创建iosched目录下面那么多文件的.我们先来看这个attr到底是什么,这里它指向了e->elevator_type->elevator_attrs,而在刚才那个elevator_alloc()函数中,190,我们看到了eq->elevator_type被赋上了e,回溯至elevator_init(),我们来看e究竟是什么.

首先,当我们在blk_init_queue_node()中调用elevator_init的时候,传递的第二个参数是NULL,name指针是NULL.

那么很明显,235行和238行这两个if语句对于e的取值至关重要.而到了现在,传说中的电梯算法也不得不介绍了.

话说,Linux中如果你要读写一些磁盘数据,你需要创建一个block device request.这个request基本上描述了请求的扇区以及操作的类型.(,你是要读还是要写)而对于一个设备来说,请求多了自然就应该使用某种数据结构来存储它们,很显然我们会使用队列,于是,Linux中为每个块设备准备了一个请求队列,即所谓的request queue.每接收到一个请求,就把它插入到request queue这个队列中去.

那么这里有一个问题,比如说队列里有好几十个请求,那么谁先执行谁后执行呢?是不是谁先提交就先执行谁?不是.这里需要调度,否则磁盘的性能就会很糟糕.

比如说英超联赛,拿我家切尔西来举例,一个赛季38场英超联赛,如果说赛程是一场主场一场客场一场主场一场客场一场主场一场客场…,那么这样的赛程一定是很糟糕的,因为球员要不停的奔波,每踢一场比赛就得进行一次车旅劳顿,球员纷纷疲于奔命,状态根本无法保证,那么比这个好点的赛程是什么?比如,连续几个主场,连续几个客场,那么至少在连续的这几个主场作战的期间球员们不用把体力消耗在旅途中,而在连续的几个客场中,怎么安排又有区别了,假设有这样四个连续的客场,对手分别是曼联,曼城,利物浦,埃弗顿,那么理想的赛程是,踢曼联和踢曼城这两场相邻,踢利物浦和踢埃弗顿这两场相邻,这样旅途耗费时间最少,那么最恶劣的赛程是什么呢?先去曼彻斯特踢曼联,然后去利物浦踢利物浦,然后又折回曼彻斯特踢曼城,再然后又杀回利物浦去战埃弗顿,很显然这样的赛程是最艰苦的,这就是所谓的魔鬼赛程.所以赛程的好坏很有可能影响一支球队的战绩.

而磁盘调度也是如此.磁头的移来移去是很费时间的,如果我这一次要读的扇区在曼彻斯特”,下一次要读的扇区又在利物浦”,下下次又回到曼彻斯特”,然后又去到利物浦”,这样显然会影响磁盘的性能.所以如果我们能够改变这种顺序,能够让前后两次访问的扇区尽量在相邻的位置,那么毫无疑问将提高磁盘的性能.而完成这项工作的叫做IO调度器.(The I/O Scheduler)

IO调度器的总体目标是希望让磁头能够总是往一个方向移动,移动到底了再往反方向走,这恰恰就是现实生活中的电梯模型,所以IO调度器也被叫做电梯.(elevator)而相应的算法也就被叫做电梯算法.LinuxIO调度的电梯算法有好几种,一个叫做as(Anticipatory),一个叫做cfq(Complete Fairness Queueing),一个叫做deadline,还有一个叫做noop(No Operation).具体使用哪种算法我们可以在启动的时候通过内核参数elevator来指定.比如在我的grub配置文件中就这样设置过:

###Don't change this comment - YaST2 identifier: Original name: linux###

title Linux

    kernel (hd0,0)/vmlinuz root=/dev/sda3 selinux=0 resume=/dev/sda2 splash=silent elevator=cfq showopts console=ttyS0,9600 console=tty0

    initrd (hd0,0)/initrd

elevator=cfq,因此cfq算法将是我们的IO调度器所采用的算法.而另一方面我们也可以单独的为某个设备指定它所采用的IO调度算法,这就通过修改在/sys/block/sda/queue/目录下面的scheduler文件.比如我们可以先看一下我的这块硬盘:

[root@localhost ~]# cat /sys/block/sda/queue/scheduler

noop anticipatory deadline [cfq]

可以看到我们这里采用的是cfq.

Ok,现在还不是细说这几种算法的时刻,我们接着刚才的话题,还看elevator_init().

首先chosen_elevator是定义于block/elevator.c中的一个字符串.

    160 static char chosen_elevator[16];

这个字符串就是用来记录启动参数elevator.如果没有设置,那就没有值.

CONFIG_DEFAULT_IOSCHED是一个编译选项.它就是一字符串,在编译内核的时候设置的,比如我的是cfq.

    119 CONFIG_DEFAULT_IOSCHED="cfq"

你当然也可以选择其它三个,看个人喜好了,喜欢哪个就选择哪个.我的建议是,喜欢的就要拥有她,不要害怕结果.总之这个字符串会传递给elevator_get这个来自block/elevator.c的函数:

    133 static struct elevator_type *elevator_get(const char *name)

    134 {

    135         struct elevator_type *e;

    136

    137         spin_lock(&elv_list_lock);

    138

    139         e = elevator_find(name);

    140         if (e && !try_module_get(e->elevator_owner))

    141                 e = NULL;

    142

    143         spin_unlock(&elv_list_lock);

    144

    145         return e;

    146 }

这里elevator_find()也来自同一个文件.

    112 static struct elevator_type *elevator_find(const char *name)

    113 {

    114         struct elevator_type *e;

    115         struct list_head *entry;

    116

    117         list_for_each(entry, &elv_list) {

    118

    119                 e = list_entry(entry, struct elevator_type, list);

    120

    121                 if (!strcmp(e->elevator_name, name))

    122                         return e;

    123         }

    124

    125         return NULL;

    126 }

&elv_list是什么?首先,复旦南区后门卖炒饭的那几对夫妻都知道elv_list一定是一个链表.但是这张链表具体是什么内容呢?事实上,甭管是这四种算法中的哪一种,在正式登台演出之前,都需要做一些初始化,初始化过程中最本质的一项工作就是调用elv_register()函数来注册自己.而这个注册主要就是往elv_list这张链表里登记.

    965 int elv_register(struct elevator_type *e)

    966 {

    967         char *def = "";

    968

    969         spin_lock(&elv_list_lock);

    970         BUG_ON(elevator_find(e->elevator_name));

    971         list_add_tail(&e->list, &elv_list);

    972         spin_unlock(&elv_list_lock);

    973

    974         if (!strcmp(e->elevator_name, chosen_elevator) ||

    975                         (!*chosen_elevator &&

    976                          !strcmp(e->elevator_name, CONFIG_DEFAULT_IOSCHED)))

    977                                 def = " (default)";

    978

    979         printk(KERN_INFO "io scheduler %s registered%s/n", e->elevator_name, def);

    980         return 0;

    981 }

看到list_add_tail那行了吗.那么这个elevator_type结构体又代表了什么呢?正如其名,它代表着一种电梯算法的类型,比如对于cfq,cfq-iosched.c文件中,就定义了这么一个结构体变量iosched_cfq.

   2188 static struct elevator_type iosched_cfq = {

   2189         .ops = {

   2190                 .elevator_merge_fn =            cfq_merge,

   2191                 .elevator_merged_fn =           cfq_merged_request,

   2192                 .elevator_merge_req_fn =        cfq_merged_requests,

   2193                 .elevator_allow_merge_fn =      cfq_allow_merge,

   2194                 .elevator_dispatch_fn =         cfq_dispatch_requests,

   2195                 .elevator_add_req_fn =          cfq_insert_request,

   2196                 .elevator_activate_req_fn =     cfq_activate_request,

   2197                 .elevator_deactivate_req_fn =   cfq_deactivate_request,

   2198                 .elevator_queue_empty_fn =      cfq_queue_empty,

   2199                .elevator_completed_req_fn =    cfq_completed_request,

   2200                 .elevator_former_req_fn =       elv_rb_former_request,

   2201                 .elevator_latter_req_fn =       elv_rb_latter_request,

   2202                 .elevator_set_req_fn =          cfq_set_request,

   2203                 .elevator_put_req_fn =          cfq_put_request,

   2204                 .elevator_may_queue_fn =        cfq_may_queue,

   2205                 .elevator_init_fn =             cfq_init_queue,

   2206                 .elevator_exit_fn =             cfq_exit_queue,

   2207                 .trim =                         cfq_free_io_context,

   2208         },

   2209         .elevator_attrs =       cfq_attrs,

   2210         .elevator_name =        "cfq",

   2211         .elevator_owner =       THIS_MODULE,

   2212 };

同样,我们可以找到,对于noop,也有类似的变量.

     87 static struct elevator_type elevator_noop = {

     88         .ops = {

     89              .elevator_merge_req_fn          = noop_merged_requests,

     90                 .elevator_dispatch_fn           = noop_dispatch,

     91                 .elevator_add_req_fn            = noop_add_request,

     92                 .elevator_queue_empty_fn        = noop_queue_empty,

     93                 .elevator_former_req_fn         = noop_former_request,

     94                 .elevator_latter_req_fn         = noop_latter_request,

     95                 .elevator_init_fn               = noop_init_queue,

     96                 .elevator_exit_fn               = noop_exit_queue,

     97         },

     98         .elevator_name = "noop",

     99         .elevator_owner = THIS_MODULE,

    100 };

所以,我们就知道这个e到底是要得到什么了,如果你什么都没设置,那么它只能选择最差的那个,noop.于是到现在我们终于明白elv_register_queue()中那个e->elevator_type是啥了.而我们要的是e->elevator_type->elevator_attrs.对于cfq,很显然,它就是cfq_attrs.block/cfq-iosched.c:

   2175 static struct elv_fs_entry cfq_attrs[] = {

   2176         CFQ_ATTR(quantum),

   2177         CFQ_ATTR(fifo_expire_sync),

   2178         CFQ_ATTR(fifo_expire_async),

   2179         CFQ_ATTR(back_seek_max),

   2180         CFQ_ATTR(back_seek_penalty),

   2181         CFQ_ATTR(slice_sync),

   2182         CFQ_ATTR(slice_async),

   2183         CFQ_ATTR(slice_async_rq),

   2184         CFQ_ATTR(slice_idle),

   2185         __ATTR_NULL

   2186 };

所以,那个while循环的sysfs_create_file的功绩就是以上面这个数组的元素的名字建立一堆的文件.而这正是我们在/sys/block/sdf/queue/iosched/目录下面看到的那些文件.

至此,elv_register_queue就算是结束了,从而blk_register_queue()也就结束了,add_disk这个不朽的函数终于大功告成.这一刻开始,整个块设备工作的大舞台就已经搭好了.对于sd那边来说,sd_probe就是在结束add_disk之后结束的.

看完之后,我深深的吸了一口气,我不得不承认,add_disk这个函数,这个只有四行代码的函数,很好,很强大.写代码毕竟不是写琼瑶剧本,不可能像<<一帘幽梦>>里的一句我爱你”,需要用四十几集来诠释,那才叫一个深刻呢!

你可能感兴趣的:(Linux那些事儿之我是Block层(6)浓缩就是精华?(三))