主要源码目录 :block/bio.c include/linux/bio.h
内核版本:4.19.1
这部分先总结下bio子系统的初始化部分,后续再总结bio的申请以及释放,还有其它api的使用等介绍。
bio的涉及到频繁的内存申请以及释放,包括bio结构体本身以及它比较重要的成员bi_io_vec*的内存申请和释放,所以初始化部分主要是相关的内存池的初始化,用于后面bio和bi_io_vec的内存申请和释放。
其中bio结构体和bio_vec结构体定义如下:
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
*/
struct bio {
struct bio *bi_next; /* request queue link */
struct gendisk *bi_disk;
unsigned int bi_opf; /* bottom bits req flags,
* top bits REQ_OP. Use
* accessors.
*/
unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
unsigned short bi_write_hint;
blk_status_t bi_status;
u8 bi_partno;
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned int bi_phys_segments;
/*
* To keep track of the max segment size, we account for the
* sizes of the first and last mergeable segments in this bio.
*/
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
struct bvec_iter bi_iter; //用来遍历bvec,标记bio的处理进度
atomic_t __bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
/*
* Optional ioc and css associated with this bio. Put on bio
* release. Read comment on top of bio_associate_current().
*/
struct io_context *bi_ioc;
struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
};
unsigned short bi_vcnt; /* how many bio_vec's */
/*
* Everything starting with bi_max_vecs will be preserved by bio_reset()
*/
unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
atomic_t __bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
* MUST obviously be kept at the very end of the bio.
*/
struct bio_vec bi_inline_vecs[0];
};
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
*/
struct bio_vec {
struct page *bv_page;
unsigned int bv_len;
unsigned int bv_offset;
};
其中,bio_integrity_init函数和bioset_integrity_create函数先不看吧。
//这个是涉及到的结构体定义
struct bio_slab {
struct kmem_cache *slab; //指向cache描述符,cache描述符描述了slab信息
unsigned int slab_ref; //bio_slab结构体的引用计数
unsigned int slab_size; //内存池中object大小
char name[8]; //内存池名称,/proc/slabinfo可以看到
};
static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *bio_slabs;
static unsigned int bio_slab_nr, bio_slab_max;
/*
1:https://zhuanlan.zhihu.com/p/596543999?utm_id=0
2:https://blog.csdn.net/geshifei/article/details/119959905
*/
static int __init init_bio(void)
{
/*全局数组*/
bio_slab_max = 2;
bio_slab_nr = 0;
bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), GFP_KERNEL);
if (!bio_slabs)
panic("bio: can't allocate bios\n");
/*
这部分先不看
*/
bio_integrity_init();
//bvec_slabs数组管理bvec内存池
biovec_init_slabs(); //分配bio_vec的高速内存池
/*
参数1:需要初始化的bio_set指针
参数二:内存池中bio的最小数量
参数三:struct bio结构体前附加数据块的大小,front_pad+sizeof(struct bio)就是内存池中object的大小。
从fs_bio_set类型内存池中申请bio内存块,bio前无附加数据
参数四:
BIOSET_NEED_BVECS :创建一个单独的内存池用于分配iovecs,fs_bio_set需设置该标记
BIOSET_NEED_RESCUER:创建一个workqueue,处理函数为bio_alloc_rescue,当内存不足无法从内存池中申请到bio时,
该workqueue把处理stack device时(比如raid)暂存在list中的bio(参考后文“bio的提交”)提交处理,从而回收bio。
对于非stack device场景,不需要这个工作队列。
对fs_bio_set这个变量进行初始化
*/
if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) //2, 0 ,1
panic("bio: can't allocate bios\n");
/*
这部分先不看
*/
if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE)) //2
panic("bio: can't create integrity pool\n");
return 0;
}
subsys_initcall(init_bio);
这里先分析下bio_slabs 这个结构体,,看变量名字也比较容易分析,它是后续用于申请bio这个结构体时使用的,只是初始化时,它的数组下标只有两个,且其成员也没有初始化,在实际使用时,它的成员才会进行初始化,而且在必要时也会进行扩容,也就是它的数组下标不是2了。(这个见bio_find_or_create_slab函数),,所以在申请bio的时候根据算法的设计,它有可能是从任何一个下标的成员(slab)当中申请内存的。
这个函数,看名称也不难分析,是对后续在申请biovec相关内存时做的一些初始化。
#define BIO_INLINE_VECS 4
/*
1://https://blog.csdn.net/sinat_32960911/article/details/132237781
2:__read_mostly gcc编译选项 标记该变量会被频繁访问 提高系统性能
struct biovec_slab {
int nr_vecs;
char *name;
struct kmem_cache *slab;
};
*/
#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max), //这个当作是256吧
};
#undef BV //取消该宏定义
static void __init biovec_init_slabs(void)
{
int i;
for (i = 0; i < BVEC_POOL_NR; i++) { //6
int size;
struct biovec_slab *bvs = bvec_slabs + i; //指向结构体数组的首地址
if (bvs->nr_vecs <= BIO_INLINE_VECS) { //4, 前面两个成员
bvs->slab = NULL;
continue;
}
/*
这个size比较关键
*/
size = bvs->nr_vecs * sizeof(struct bio_vec);
bvs->slab = kmem_cache_create(bvs->name, size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}
}
这里比较重要的是调用的bio_find_or_create_slab函数,其余的就是对fs_bio_set这个结构体的一些成员变量进行初始化。
struct bio_set {
struct kmem_cache *bio_slab;
unsigned int front_pad;
mempool_t bio_pool;
mempool_t bvec_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
mempool_t bio_integrity_pool;
mempool_t bvec_integrity_pool;
#endif
//Deadlock avoidance for stacking block drivers: see comments in bio_alloc_bioset() for details
spinlock_t rescue_lock;
struct bio_list rescue_list;
struct work_struct rescue_work;
struct workqueue_struct *rescue_workqueue;
};
/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
struct bio_set fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);
int bioset_init(struct bio_set *bs,unsigned int pool_size, unsigned int front_pad, int flags)
{
//sizeof(bio)的多余部分
unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); //4 * sizeof(struct bio_vec)
bs->front_pad = front_pad; //0
spin_lock_init(&bs->rescue_lock); //自旋锁初始化
bio_list_init(&bs->rescue_list); //bio_list 初始化
INIT_WORK(&bs->rescue_work, bio_alloc_rescue); //工作队列初始化
/*返回的这个值,也记录在这个bio_slabs全局数组里了
看函数名称,bio当中寻找slab或者创建一个slab
*/
bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); //0+back_pad
if (!bs->bio_slab)
return -ENOMEM;
/*
bio_pool的初始化,后面分配内存使用mempool_alloc
pool_size内存池至少保留两个元素
mempool_init(pool, min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc);
后续申请内存走的是这个路线 -> mempool_alloc_slab -> kmem_cache_alloc
*/
if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))//2
goto bad;
//1 & (1 << 0)
if ((flags & BIOSET_NEED_BVECS) && biovec_init_pool(&bs->bvec_pool, pool_size)) //2
goto bad;
if (!(flags & BIOSET_NEED_RESCUER)) // 1 & (1 << 1) -> 0001 & 0010
return 0;
bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); //申请一个工作队列
if (!bs->rescue_workqueue)
goto bad;
return 0;
bad:
bioset_exit(bs);
return -ENOMEM;
}
EXPORT_SYMBOL(bioset_init);
static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
unsigned int sz = sizeof(struct bio) + extra_size;
struct kmem_cache *slab = NULL;
struct bio_slab *bslab, *new_bio_slabs;
unsigned int new_bio_slab_max;
unsigned int i, entry = -1;
mutex_lock(&bio_slab_lock);
/*
遍历bio_slabs数组,若某个bio_slab->slab_size等于sz,
那么就选择这个bio_slab.
*/
i = 0;
while (i < bio_slab_nr) { //这个一开始是0
bslab = &bio_slabs[i];
//若bio_slabs有空闲未用的bio_slab,选择该bio_slab
if (!bslab->slab && entry == -1)
entry = i;
else if (bslab->slab_size == sz) {
slab = bslab->slab;
bslab->slab_ref++;
break;
}
i++;
}
//找到bio_slab了
if (slab)
goto out_unlock;
/*
经过上面两步依然未找到可用的bio_slab,
通过krealloc将bio_slabs数组长度扩大一倍,
并选择其中一个空闲的bio_slab。
*/
if (bio_slab_nr == bio_slab_max && entry == -1) {
new_bio_slab_max = bio_slab_max << 1; //相当于乘以2
new_bio_slabs = krealloc(bio_slabs, new_bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
if (!new_bio_slabs)
goto out_unlock;
bio_slab_max = new_bio_slab_max;
bio_slabs = new_bio_slabs;
}
if (entry == -1)
entry = bio_slab_nr++;
bslab = &bio_slabs[entry];
snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
if (!slab)
goto out_unlock;
bslab->slab = slab;
bslab->slab_ref = 1;
bslab->slab_size = sz;
out_unlock:
mutex_unlock(&bio_slab_lock);
return slab;
}
其初始化过程基本就是这样了,如果对内存池用的不多也不要紧,可以看看相关源码,也不多。
总结:初始化部分,如果光光看这部分,会比较晦涩,后面在涉及到bio的申请和释放时,再回头来看初始化部分,就清晰许多了。