linux内核bio子系统学习记录一:

bio子系统学习一:

主要源码目录 :block/bio.c include/linux/bio.h
内核版本:4.19.1

这部分先总结下bio子系统的初始化部分,后续再总结bio的申请以及释放,还有其它api的使用等介绍。

bio的涉及到频繁的内存申请以及释放,包括bio结构体本身以及它比较重要的成员bi_io_vec*的内存申请和释放,所以初始化部分主要是相关的内存池的初始化,用于后面bio和bi_io_vec的内存申请和释放。

其中bio结构体和bio_vec结构体定义如下:

/*
 * main unit of I/O for the block layer and lower layers (ie drivers and
 * stacking drivers)
 */
struct bio {
	struct bio		*bi_next;	/* request queue link */
	struct gendisk		*bi_disk;
	unsigned int		bi_opf;		/* bottom bits req flags,
						 * top bits REQ_OP. Use
						 * accessors.
						 */
	unsigned short		bi_flags;	/* status, etc and bvec pool number */
	unsigned short		bi_ioprio;
	unsigned short		bi_write_hint;
	blk_status_t		bi_status;
	u8			bi_partno;
	/* Number of segments in this BIO after
	 * physical address coalescing is performed.
	 */
	unsigned int		bi_phys_segments;
	/*
	 * To keep track of the max segment size, we account for the
	 * sizes of the first and last mergeable segments in this bio.
	 */
	unsigned int		bi_seg_front_size;
	unsigned int		bi_seg_back_size;
	struct bvec_iter	bi_iter; //用来遍历bvec,标记bio的处理进度
	
	atomic_t		__bi_remaining;
	bio_end_io_t		*bi_end_io;
	void			*bi_private;
#ifdef CONFIG_BLK_CGROUP
	/*
	 * Optional ioc and css associated with this bio.  Put on bio
	 * release.  Read comment on top of bio_associate_current().
	 */
	struct io_context	*bi_ioc;
	struct cgroup_subsys_state *bi_css;
	struct blkcg_gq		*bi_blkg;
	struct bio_issue	bi_issue;
#endif
	union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
		struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
	};
	unsigned short		bi_vcnt;	/* how many bio_vec's */
	/*
	 * Everything starting with bi_max_vecs will be preserved by bio_reset()
	 */
	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */
	atomic_t		__bi_cnt;	/* pin count */
	struct bio_vec		*bi_io_vec;	/* the actual vec list */
	struct bio_set		*bi_pool;
	/*
	 * We can inline a number of vecs at the end of the bio, to avoid
	 * double allocations for a small number of bio_vecs. This member
	 * MUST obviously be kept at the very end of the bio.
	 */
	struct bio_vec		bi_inline_vecs[0];
};
/*
 * was unsigned short, but we might as well be ready for > 64kB I/O pages
 */
struct bio_vec {
	struct page	*bv_page;
	unsigned int	bv_len;
	unsigned int	bv_offset;
};

入口函数init_bio:

其中,bio_integrity_init函数和bioset_integrity_create函数先不看吧。

//这个是涉及到的结构体定义
struct bio_slab {
	struct kmem_cache *slab; //指向cache描述符,cache描述符描述了slab信息
	unsigned int slab_ref; //bio_slab结构体的引用计数
	unsigned int slab_size; //内存池中object大小
	char name[8]; //内存池名称,/proc/slabinfo可以看到
};

static DEFINE_MUTEX(bio_slab_lock);
static struct bio_slab *bio_slabs;
static unsigned int bio_slab_nr, bio_slab_max;

/*
	1:https://zhuanlan.zhihu.com/p/596543999?utm_id=0
	2:https://blog.csdn.net/geshifei/article/details/119959905
*/
static int __init init_bio(void)
{
	/*全局数组*/
	bio_slab_max = 2;
	bio_slab_nr = 0;
	bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab), GFP_KERNEL);
	if (!bio_slabs)
		panic("bio: can't allocate bios\n");
	/*
		这部分先不看
	*/
	bio_integrity_init();

	//bvec_slabs数组管理bvec内存池
	biovec_init_slabs(); //分配bio_vec的高速内存池

	/*
		参数1:需要初始化的bio_set指针
		参数二:内存池中bio的最小数量
		参数三:struct bio结构体前附加数据块的大小,front_pad+sizeof(struct bio)就是内存池中object的大小。
			从fs_bio_set类型内存池中申请bio内存块,bio前无附加数据
		参数四:
			BIOSET_NEED_BVECS :创建一个单独的内存池用于分配iovecs,fs_bio_set需设置该标记
			BIOSET_NEED_RESCUER:创建一个workqueue,处理函数为bio_alloc_rescue,当内存不足无法从内存池中申请到bio时,
			该workqueue把处理stack device时(比如raid)暂存在list中的bio(参考后文“bio的提交”)提交处理,从而回收bio。
			对于非stack device场景,不需要这个工作队列。

		对fs_bio_set这个变量进行初始化
	*/
	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS)) //2, 0 ,1
		panic("bio: can't allocate bios\n");

	/*
		这部分先不看
	*/
	if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE)) //2
		panic("bio: can't create integrity pool\n");

	return 0;
}
subsys_initcall(init_bio);

这里先分析下bio_slabs 这个结构体,,看变量名字也比较容易分析,它是后续用于申请bio这个结构体时使用的,只是初始化时,它的数组下标只有两个,且其成员也没有初始化,在实际使用时,它的成员才会进行初始化,而且在必要时也会进行扩容,也就是它的数组下标不是2了。(这个见bio_find_or_create_slab函数),,所以在申请bio的时候根据算法的设计,它有可能是从任何一个下标的成员(slab)当中申请内存的。

biovec_init_slabs函数分析

这个函数,看名称也不难分析,是对后续在申请biovec相关内存时做的一些初始化。

#define BIO_INLINE_VECS 4
/*
	1://https://blog.csdn.net/sinat_32960911/article/details/132237781
	2:__read_mostly gcc编译选项 标记该变量会被频繁访问 提高系统性能
	struct biovec_slab {
		int nr_vecs;
		char *name;
		struct kmem_cache *slab;
	};
*/
#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = { 
	BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max), //这个当作是256吧
};
#undef BV //取消该宏定义

static void __init biovec_init_slabs(void)
{
	int i;
	for (i = 0; i < BVEC_POOL_NR; i++) { //6
		int size;
		struct biovec_slab *bvs = bvec_slabs + i; //指向结构体数组的首地址

		if (bvs->nr_vecs <= BIO_INLINE_VECS) { //4, 前面两个成员
			bvs->slab = NULL; 
			continue;
		}
		/*
			这个size比较关键
		*/
		size = bvs->nr_vecs * sizeof(struct bio_vec);
		bvs->slab = kmem_cache_create(bvs->name, size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
	}
}

bioset_init函数分析

这里比较重要的是调用的bio_find_or_create_slab函数,其余的就是对fs_bio_set这个结构体的一些成员变量进行初始化。

struct bio_set {
	struct kmem_cache *bio_slab;
	unsigned int front_pad;
	mempool_t bio_pool;
	mempool_t bvec_pool;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
	mempool_t bio_integrity_pool;
	mempool_t bvec_integrity_pool;
#endif
	//Deadlock avoidance for stacking block drivers: see comments in bio_alloc_bioset() for details
	spinlock_t		rescue_lock;
	struct bio_list		rescue_list;
	struct work_struct	rescue_work;
	struct workqueue_struct	*rescue_workqueue;
};

/*
 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
 * IO code that does not need private memory pools.
 */
struct bio_set fs_bio_set;
EXPORT_SYMBOL(fs_bio_set);

int bioset_init(struct bio_set *bs,unsigned int pool_size, unsigned int front_pad, int flags)
{
	//sizeof(bio)的多余部分
	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); //4 * sizeof(struct bio_vec)

	bs->front_pad = front_pad; //0

	spin_lock_init(&bs->rescue_lock); //自旋锁初始化
	bio_list_init(&bs->rescue_list); //bio_list 初始化
	INIT_WORK(&bs->rescue_work, bio_alloc_rescue); //工作队列初始化

	/*返回的这个值,也记录在这个bio_slabs全局数组里了
		看函数名称,bio当中寻找slab或者创建一个slab
	*/
	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad); //0+back_pad
	if (!bs->bio_slab)
		return -ENOMEM;
	/*
		bio_pool的初始化,后面分配内存使用mempool_alloc
		pool_size内存池至少保留两个元素

		mempool_init(pool, min_nr, mempool_alloc_slab, mempool_free_slab, (void *) kc);
		后续申请内存走的是这个路线 -> mempool_alloc_slab -> kmem_cache_alloc
	*/
	if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))//2
		goto bad;

	//1 & (1 << 0)
	if ((flags & BIOSET_NEED_BVECS) && biovec_init_pool(&bs->bvec_pool, pool_size)) //2
		goto bad;

	if (!(flags & BIOSET_NEED_RESCUER)) // 1 & (1 << 1)   -> 0001 & 0010
		return 0;

	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); //申请一个工作队列
	if (!bs->rescue_workqueue)
		goto bad;
	return 0;
bad:
	bioset_exit(bs);
	return -ENOMEM;
}
EXPORT_SYMBOL(bioset_init);

bio_find_or_create_slab函数分析

static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
{
	unsigned int sz = sizeof(struct bio) + extra_size; 
	struct kmem_cache *slab = NULL;
	struct bio_slab *bslab, *new_bio_slabs;
	unsigned int new_bio_slab_max;
	unsigned int i, entry = -1;

	mutex_lock(&bio_slab_lock);
	/*
		遍历bio_slabs数组,若某个bio_slab->slab_size等于sz,
		那么就选择这个bio_slab.
	*/
	i = 0;
	while (i < bio_slab_nr) { //这个一开始是0
		bslab = &bio_slabs[i];
		//若bio_slabs有空闲未用的bio_slab,选择该bio_slab
		if (!bslab->slab && entry == -1)
			entry = i;
		else if (bslab->slab_size == sz) {
			slab = bslab->slab;
			bslab->slab_ref++;
			break;
		}
		i++;
	}
	//找到bio_slab了
	if (slab)
		goto out_unlock;
	/*
		经过上面两步依然未找到可用的bio_slab,
		通过krealloc将bio_slabs数组长度扩大一倍,
		并选择其中一个空闲的bio_slab。
	*/
	if (bio_slab_nr == bio_slab_max && entry == -1) {
		new_bio_slab_max = bio_slab_max << 1; //相当于乘以2
		new_bio_slabs = krealloc(bio_slabs, new_bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
		if (!new_bio_slabs)
			goto out_unlock;
		bio_slab_max = new_bio_slab_max;
		bio_slabs = new_bio_slabs;
	}
	if (entry == -1)
		entry = bio_slab_nr++;

	bslab = &bio_slabs[entry];

	snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
	slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, SLAB_HWCACHE_ALIGN, NULL);
	if (!slab)
		goto out_unlock;

	bslab->slab = slab;
	bslab->slab_ref = 1;
	bslab->slab_size = sz;
out_unlock:
	mutex_unlock(&bio_slab_lock);
	return slab;
}

其初始化过程基本就是这样了,如果对内存池用的不多也不要紧,可以看看相关源码,也不多。

总结:初始化部分,如果光光看这部分,会比较晦涩,后面在涉及到bio的申请和释放时,再回头来看初始化部分,就清晰许多了。

你可能感兴趣的:(#,block,linux)