struct block_device被内核用来表示块设备,它是块设备的基础数据结构。系统中所有的块设备都被添加到了伪文件系统bdev中,虚拟文件系统VFS通过该文件系统来访问块设备。bdev使用的核心数据结构为:
struct bdev_inode { struct block_device bdev; struct inode vfs_inode; };由此可见,只要找到块设备在文件系统中的inode(它被保存在bdev_inode的vfs_inode域中),就可以找到该块设备对应的struct block_device,进而就可以对设备进行操作了。从VFS到块设备的关联关系就是通过伪文件系统bdev建立起来的。
int register_blkdev(unsigned int major, const char *name);
int unregister_blkdev(unsigned int major, const char *name);参数和注册时的要匹配。
struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ struct list_head bd_inodes; void * bd_claiming; void * bd_holder; int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif struct block_device * bd_contains; unsigned bd_block_size; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; struct gendisk * bd_disk; struct request_queue * bd_queue; struct list_head bd_list; /* * Private data. You must have bd_claim'ed the block_device * to use this. NOTE: bd_claim allows an owner to claim * the same device multiple times, the owner must take special * care to not mess up bd_private for that case. */ unsigned long bd_private; /* The counter of freeze processes */ int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; };
struct gendisk { /* major, first_minor and minors are input parameters only, * don't use directly. Use disk_devt() and disk_max_parts(). */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for disks that can't be partitioned. */ char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned int events; /* supported events */ unsigned int async_events; /* async events, subset of all */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other * non-critical accesses use RCU. Always access through * helpers. */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; struct request_queue *queue; void *private_data; int flags; struct device *driverfs_dev; // FIXME: remove struct kobject *slave_dir; struct timer_rand_state *random; atomic_unchecked_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; #endif int node_id; };
struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif unsigned long stamp; atomic_t in_flight[2]; #ifdef CONFIG_SMP struct disk_stats __percpu *dkstats; #else struct disk_stats dkstats; #endif atomic_t ref; struct rcu_head rcu_head; };其中关键数据域的含义如下:
struct block_device_operations { int (*open) (struct block_device *, fmode_t); int (*release) (struct gendisk *, fmode_t); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); struct module *owner; } __do_const;其中特殊的api的含义如下
struct request_queue { /* * Together with queue_head for cacheline sharing */ struct list_head queue_head; struct request *last_merge; struct elevator_queue *elevator; /* * the queue request freelist, one for reads and one for writes */ struct request_list rq; request_fn_proc *request_fn; make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; merge_bvec_fn *merge_bvec_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; /* * Dispatch queue sorting */ sector_t end_sector; struct request *boundary_rq; /* * Delayed queue handling */ struct delayed_work delay_work; struct backing_dev_info backing_dev_info; /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. */ void *queuedata; /* * various queue flags, see QUEUE_* below */ unsigned long queue_flags; /* * ida allocated id for this queue. Used to index queues from * ioctx. */ int id; /* * queue needs bounce pages for pages above this limit */ gfp_t bounce_gfp; /* * protects queue structures from reentrancy. ->__queue_lock should * _never_ be used directly, it is queue private. always use * ->queue_lock. */ spinlock_t __queue_lock; spinlock_t *queue_lock; /* * queue kobject */ struct kobject kobj; /* * queue settings */ unsigned long nr_requests; /* Max # of requests */ unsigned int nr_congestion_on; unsigned int nr_congestion_off; unsigned int nr_batching; unsigned int dma_drain_size; void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; struct list_head tag_busy_list; unsigned int nr_sorted; unsigned int in_flight[2]; unsigned int rq_timeout; struct timer_list timeout; struct list_head timeout_list; struct list_head icq_list; struct queue_limits limits; /* * sg stuff */ unsigned int sg_timeout; unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace *blk_trace; #endif /* * for flush operations */ unsigned int flush_flags; unsigned int flush_not_queueable:1; unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; struct request flush_rq; struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; int bsg_job_size; struct bsg_class_device bsg_dev; #endif #ifdef CONFIG_BLK_DEV_THROTTLING /* Throttle data */ struct throtl_data *td; #endif };该数据结构比较庞大,其主要成员及其含义如下:
struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock);