2. bio
相关数据结构:
/*
* main unit of I/O for the block layer and lower layers (ie drivers and
* stacking drivers)
*/
struct bio {
sector_t bi_sector;
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
unsigned long bi_flags; /* status, command, etc */
unsigned long bi_rw; /* bottom bits READ/WRITE,
* top bits priority
*/
unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_idx; /* current index into bvl_vec */
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned short bi_phys_segments;
/* Number of segments after physical and DMA remapping
* hardware coalescing is performed.
*/
unsigned short bi_hw_segments;
unsigned int bi_size; /* residual I/O count */
/*
* To keep track of the max hw size, we account for the
* sizes of the first and last virtually mergeable segments
* in this bio
*/
unsigned int bi_hw_front_size;
unsigned int bi_hw_back_size;
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
struct bio_vec *bi_io_vec; /* the actual vec list */
bio_end_io_t *bi_end_io;
atomic_t bi_cnt; /* pin count */
void *bi_private;
bio_destructor_t *bi_destructor; /* destructor */
struct bio_set *bi_set; /* memory pools set */
};
/*
* was unsigned short, but we might as well be ready for > 64kB I/O pages
*/
struct bio_vec {
struct page *bv_page;
unsigned int bv_len;
unsigned int bv_offset;
};
bio是 Generic block layer(通用块层) 的核心数据结构。
bio中的段(segment)是由bio_vec数据结构表示的。 bio相关的段被放在bio_vec的数组中,他们的关系表示如下:
3. gendisk
相关数据结构:
struct gendisk {
int major; /* major number of driver */
int first_minor;
int minors; /* maximum number of minors, =1 for
* disks that can't be partitioned. */
char disk_name[32]; /* name of major driver */
struct hd_struct **part; /* [indexed by minor] */
struct block_device_operations *fops;
struct request_queue *queue;
void *private_data;
sector_t capacity;
int flags;
char devfs_name[64]; /* devfs crap */
int number; /* more of the same */
struct device *driverfs_dev;
struct kobject kobj;
struct timer_rand_state *random;
int policy;
atomic_t sync_io; /* RAID */
unsigned long stamp, stamp_idle;
int in_flight;
#ifdef CONFIG_SMP
struct disk_stats *dkstats;
#else
struct disk_stats dkstats;
#endif
};
struct hd_struct {
sector_t start_sect;
sector_t nr_sects;
struct kobject kobj;
unsigned reads, read_sectors, writes, write_sectors;
int policy, partno;
};
磁盘由gendisk对象来表示。
磁盘的分区用hd_struct对象表示。
磁盘的分区是通过设备的次设备号(minor)来区分的,例如
4. request_queue
相关数据结构:
请求队列中两个比较重要的数据结构是: request_queue 和 request
struct request_queue
{
/*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
struct request *last_merge;
elevator_t *elevator;
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq;
request_fn_proc *request_fn;
merge_request_fn *back_merge_fn;
merge_request_fn *front_merge_fn;
merge_requests_fn *merge_requests_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
activity_fn *activity_fn;
issue_flush_fn *issue_flush_fn;
prepare_flush_fn *prepare_flush_fn;
end_flush_fn *end_flush_fn;
/*
* Auto-unplugging state
*/
struct timer_list unplug_timer;
int unplug_thresh; /* After this many requests */
unsigned long unplug_delay; /* After this many jiffies */
struct work_struct unplug_work;
struct backing_dev_info backing_dev_info;
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
*/
void *queuedata;
void *activity_data;
/*
* queue needs bounce pages for pages above this limit
*/
unsigned long bounce_pfn;
unsigned int bounce_gfp;
/*
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* protects queue structures from reentrancy. ->__queue_lock should
* _never_ be used directly, it is queue private. always use
* ->queue_lock.
*/
spinlock_t __queue_lock;
spinlock_t *queue_lock;
/*
* queue kobject
*/
struct kobject kobj;
/*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
unsigned int nr_congestion_on;
unsigned int nr_congestion_off;
unsigned int nr_batching;
unsigned short max_sectors;
unsigned short max_hw_sectors;
unsigned short max_phys_segments;
unsigned short max_hw_segments;
unsigned short hardsect_size;
unsigned int max_segment_size;
unsigned long seg_boundary_mask;
unsigned int dma_alignment;
struct blk_queue_tag *queue_tags;
atomic_t refcnt;
unsigned int in_flight;
/*
* sg stuff
*/
unsigned int sg_timeout;
unsigned int sg_reserved_size;
struct list_head drain_list;
/*
* reserved for flush operations
*/
struct request *flush_rq;
unsigned char ordered;
};
------------------------------------------------------------------
/*
* try to put the fields that are referenced together in the same cacheline
*/
struct request {
struct list_head queuelist; /* looking for ->queue? you must _not_
* access it directly, use
* blkdev_dequeue_request! */
unsigned long flags; /* see REQ_ bits below */
/* Maintain bio traversal state for part by part I/O submission.
* hard_* are block layer internals, no driver should touch them!
*/
sector_t sector; /* next sector to submit */
unsigned long nr_sectors; /* no. of sectors left to submit */
/* no. of sectors left to submit in the current segment */
unsigned int current_nr_sectors;
sector_t hard_sector; /* next sector to complete */
unsigned long hard_nr_sectors; /* no. of sectors left to complete */
/* no. of sectors left to complete in the current segment */
unsigned int hard_cur_sectors;
struct bio *bio;
struct bio *biotail;
void *elevator_private;
int rq_status; /* should split this into a few status bits */
struct gendisk *rq_disk;
int errors;
unsigned long start_time;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
/* Number of scatter-gather addr+len pairs after
* physical and DMA remapping hardware coalescing is performed.
* This is the number of scatter-gather entries the driver
* will actually have to deal with after DMA mapping is done.
*/
unsigned short nr_hw_segments;
int tag;
char *buffer;
int ref_count;
request_queue_t *q;
struct request_list *rl;
struct completion *waiting;
void *special;
/*
* when request is used as a packet command carrier
*/
unsigned int cmd_len;
unsigned char cmd[BLK_MAX_CDB];
unsigned int data_len;
void *data;
unsigned int sense_len;
void *sense;
unsigned int timeout;
/*
* For Power Management requests
*/
struct request_pm_state *pm;
/*
* completion callback. end_io_data should be folded in with waiting
*/
rq_end_io_fn *end_io;
void *end_io_data;
};
每一个对于块设备的请求都用一个request描述符来表示.
而每个请求又包含一个或多个bio结构.
对于一个请求队列(request_queue),其所有的request描述符形成一个双向链表.
5. block_device
相关数据结构:
struct block_device {
dev_t bd_dev; /* not a kdev_t - it's a search key */
struct inode * bd_inode; /* will die */
int bd_openers;
struct semaphore bd_sem; /* open/close mutex */
struct semaphore bd_mount_sem; /* mount mutex */
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
struct block_device * bd_contains;
unsigned bd_block_size;
struct hd_struct * bd_part;
/* number of times partitions within this device have been opened. */
unsigned bd_part_count;
int bd_invalidated;
struct gendisk * bd_disk;
struct list_head bd_list;
struct backing_dev_info *bd_inode_backing_dev_info;
/*
* Private data. You must have bd_claim'ed the block_device
* to use this. NOTE: bd_claim allows an owner to claim
* the same device multiple times, the owner must take special
* care to not mess up bd_private for that case.
*/
unsigned long bd_private;
};
每一个块设备都由一个block_device对象来表示.
这里所说的块设备其实是指一个逻辑块设备.
比如一个磁盘由一个block_device对象来表示,而该磁盘又分了三个分区,这三个分区就又是三个逻辑块设备.
block_device中的bd_contains字段指向与整个磁盘相关的block_device对象.
因而,如果如果block_device表示一个磁盘分区,则其bd_contains指向整个磁盘的block_device对象.
如果block_device表示的是一个磁盘,则其bd_contains指向自己.
block_device中有几个比较重要的字段:
bd_disk 指向块设备中磁盘对应的gendisk结构.
bd_part 指向hd_struct分区描述符(如果是整个磁盘的话,bd_part为NULL)
综合前面所有的结构,整体的结构关系如下图所示:
转自: http://bbs.chinaunix.net/thread-2050601-1-1.html