#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/hdreg.h>
#include <linux/kdev_t.h>
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
#define DEBUG_OPS 1
MODULE_LICENSE("Dual BSD/GPL");
static int sbull_major = 0;
module_param(sbull_major, int, 0);
static int hardsect_size = 512;
module_param(hardsect_size, int, 0);
static int nsectors = 102400; /* How big the drive is */
module_param(nsectors, int, 0);
static int ndevices = 4;
module_param(ndevices, int, 0);
enum {
RM_SIMPLE = 0, /* The extra-simple request function */
RM_FULL = 1, /* The full-blown version */
RM_NOQUEUE = 2, /* Use make_request */
};
static int request_mode = RM_NOQUEUE;
module_param(request_mode, int, 0);
#define SBULL_MINORS 16
#define MINOR_SHIFT 4
#define DEVNUM(kdevnum) (MINOR(kdev_t_to_nr(kdevnum)) >> MINOR_SHIFT
#define KERNEL_SECTOR_SIZE 512
#define INVALIDATE_DELAY 30*HZ
struct sbull_dev {
int size; /* Device size in sectors */
u8 *data; /* The data array */
short users; /* How many users */
short media_change; /* Flag a media change? */
spinlock_t lock; /* For mutual exclusion */
struct request_queue *queue; /* The device request queue */
struct gendisk *gd; /* The gendisk structure */
struct timer_list timer; /* For simulated media changes */
};
static struct sbull_dev *Devices = NULL;
static void sbull_transfer(struct sbull_dev *dev, unsigned long sector,
unsigned long nsect, char *buffer, int write)
{
unsigned long offset = sector*KERNEL_SECTOR_SIZE;
unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
if ((offset + nbytes) > dev->size) {
printk (KERN_NOTICE "Beyond-end write (%ld %ld)\n", offset, nbytes);
return;
}
if (write)
memcpy(dev->data + offset, buffer, nbytes);
else
memcpy(buffer, dev->data + offset, nbytes);
}
/*最简单的一个请求处理函数,这个函数由块设备子系统调用,参数是
对应该设备的请求队列*/
static void sbull_request(request_queue_t *q)
{
/*内核用request结构体表示一个请求*/
struct request *req;
/*elv_next_request从指定的请求队列上拿一个请求*/
while ((req = elv_next_request(q)) != NULL) {
struct sbull_dev *dev = req->rq_disk->private_data;
/*在这里我们只处理文件系统请求,还有其他的一些命令请求都忽略,
比如让设备挂起或者恢复请求等*/
if (! blk_fs_request(req)) {
printk (KERN_NOTICE "Skip non-fs request\n");
/*每一个请求完成之后都要告诉子系统,0代表没有完成请求*/
end_request(req, 0);
continue;
}
/*相关参数简介:req->sector 请求的开始扇区
req->current_nr_sectors 要传输的扇区数量
req->buffer 请求的数据buffer
rq_data_dir(req) 判断数据流向,返回0代表读*/
sbull_transfer(dev, req->sector, req->current_nr_sectors,
req->buffer, rq_data_dir(req));
/*1代表请求正确完成*/
end_request(req, 1);
}
}
/*该函数负责扫描一个bio里的所有bio_vec,每一个bio_vec都指向一段内存*/
static int sbull_xfer_bio(struct sbull_dev *dev, struct bio *bio)
{
int i;
struct bio_vec *bvec;
sector_t sector = bio->bi_sector;
/*for (bvec = bio_iovec_idx((bio), (bio->bi_idx)), i = (bio->bi_idx); \
i < (bio)->bi_vcnt; \
bvec++, i++)
bio->bi_sector 要请求的扇区偏移
bio->bi_vcnt 该bio包含的bio_vec的数量
bio->bi_idx 该bio当前的bio_vec偏移
__bio_kmap_atomic把bio中某个bio_vec中的page(描述一页内存的结构体)
临时映射一个虚拟地址出来,高端和低段内存都可以
#define __bio_kmap_atomic(bio, idx, kmtype) \
(kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \
bio_iovec_idx((bio), (idx))->bv_offset)
*/
bio_for_each_segment(bvec, bio, i) {
char *buffer = __bio_kmap_atomic(bio, i, KM_USER0);
sbull_transfer(dev, sector, bio_cur_sectors(bio),//?
buffer, bio_data_dir(bio) == WRITE);
sector += bio_cur_sectors(bio);
__bio_kunmap_atomic(bio, KM_USER0);
}
return 0;
}
/*该函数负责扫描一个请求里的所有的bio*/
static int sbull_xfer_request(struct sbull_dev *dev, struct request *req)
{
struct bio *bio;
int nsect = 0;
rq_for_each_bio(bio, req) {
sbull_xfer_bio(dev, bio);
nsect += bio->bi_size/KERNEL_SECTOR_SIZE;
}
return nsect;
}
/*使用bio的请求处理函数*/
static void sbull_full_request(request_queue_t *q)
{
struct request *req;
int sectors_xferred;
struct sbull_dev *dev = q->queuedata;
while ((req = elv_next_request(q)) != NULL) {
if (! blk_fs_request(req)) {
printk (KERN_NOTICE "Skip non-fs request\n");
end_request(req, 0);
continue;
}
sectors_xferred = sbull_xfer_request(dev, req);
/*end_that_request_first:驱动程序从前一次结束的地方开始,完成了规定数目的扇区的传输。如果成功,返回0.表示该请求执行完毕。这时候必须用blkdev_dequeue_request(req) 从队列里面删除请求,并把其传输给end_that_request_last(req);通知任何等待该请求结束的对象,并重复利用该request结构*/
if (! end_that_request_first(req, 1, sectors_xferred)) {
blkdev_dequeue_request(req);
end_that_request_last(req, 0);
}
}
}
/*无请求队列的bio处理函数,如果该函数返回非零的值,则bio会被再次提交*/
static int sbull_make_request(request_queue_t *q, struct bio *bio)
{
struct sbull_dev *dev = q->queuedata;
int status;
status = sbull_xfer_bio(dev, bio);
/*告知子系统bio传输完成,status为0代表成功*/
bio_endio(bio, bio->bi_size, status);
return 0;
}
/*open*/
static int sbull_open(struct inode *inode, struct file *filp)
{
#ifdef DEBUG_OPS
printk("--%s--%s--\n", __FILE__, __FUNCTION__);
#endif
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
del_timer_sync(&dev->timer);
filp->private_data = dev;
spin_lock(&dev->lock);
if (! dev->users)
check_disk_change(inode->i_bdev);
#if 0
检查磁盘介质是否改变,具体的检测函数其实是自己定义的
media_changed = sbull_media_changed,如果介质改变则
执行介质改变后要执行的操作,其实也是自己实现的操作
revalidate_disk = sbull_revalidate
int check_disk_change(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
struct block_device_operations * bdops = disk->fops;
if (!bdops->media_changed)
return 0;
if (!bdops->media_changed(bdev->bd_disk))
return 0;
flush_disk(bdev);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
}
#endif
dev->users++;
spin_unlock(&dev->lock);
return 0;
}
static int sbull_release(struct inode *inode, struct file *filp)
{
#ifdef DEBUG_OPS
printk("--%s--%s--\n", __FILE__, __FUNCTION__);
#endif
struct sbull_dev *dev = inode->i_bdev->bd_disk->private_data;
spin_lock(&dev->lock);
dev->users--;
if (!dev->users) {
dev->timer.expires = jiffies + INVALIDATE_DELAY;
add_timer(&dev->timer);
}
spin_unlock(&dev->lock);
return 0;
}
/*返回判断介质是否改变的标志变量,这个函数在check_disk_change中被调用*/
int sbull_media_changed(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
return dev->media_change;
}
/*通过检测media_change标志位来判断介质是否已改变,如果media_change
等于1则说明介质已改变,这里用清除设备的存储空间来模拟换了一个介质
这个函数在check_disk_change被调用*/
int sbull_revalidate(struct gendisk *gd)
{
struct sbull_dev *dev = gd->private_data;
if (dev->media_change) {
dev->media_change = 0;
memset (dev->data, 0, dev->size);
}
return 0;
}
/*定时器时间到处理函数,如果没有人使用该设备则30秒后该设备就会
被换掉,这里设置media_change为1表示来换了一个介质*/
void sbull_invalidate(unsigned long ldev)
{
#ifdef DEBUG_OPS
printk("--%s--%s--\n", __FILE__, __FUNCTION__);
#endif
struct sbull_dev *dev = (struct sbull_dev *) ldev;
spin_lock(&dev->lock);
if (dev->users || !dev->data)
printk (KERN_WARNING "sbull: timer sanity check failed\n");
else
dev->media_change = 1;
spin_unlock(&dev->lock);
}
int sbull_ioctl (struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
#ifdef DEBUG_OPS
printk("--%s--%s--\n", __FILE__, __FUNCTION__);
#endif
long size;
struct hd_geometry geo;
struct sbull_dev *dev = filp->private_data;
switch(cmd) {
case HDIO_GETGEO:
/*硬盘有数个盘片,每盘片两个面,每个面一个磁头
盘片被划分为多个扇形区域即扇区
同一盘片不同半径的同心圆为磁道
不同盘片相同半径构成的圆柱面即柱面
公式:存储容量=磁头数×磁道(柱面)数×每道扇区数×每扇区字节数*/
size = dev->size / KERNEL_SECTOR_SIZE;
geo.cylinders = (size & ~0x3f) >> 5;
geo.heads = 4;
geo.sectors = 16;
geo.start = 0;
if (copy_to_user((void __user *) arg, &geo, sizeof(geo)))
return -EFAULT;
return 0;
}
return -ENOTTY;
}
/*对于这些块设备的操作*/
static struct block_device_operations sbull_ops = {
.owner = THIS_MODULE,
.open = sbull_open,//mount
.release = sbull_release,//umount
.media_changed = sbull_media_changed,
.revalidate_disk = sbull_revalidate,
.ioctl = sbull_ioctl
};
/*建立虚拟磁盘*/
static void setup_device(struct sbull_dev *dev, int which)
{
memset (dev, 0, sizeof (struct sbull_dev));
/*计算虚拟磁盘的大小,nerctors代表该磁盘拥有的扇区数,hardsect_size
代表硬件扇区的大小*/
dev->size = nsectors*hardsect_size;
/*为虚拟磁盘分配存储空间,当前虚拟磁盘的大小是dev->size个字节*/
dev->data = vmalloc(dev->size);
if (dev->data == NULL) {
printk (KERN_NOTICE "vmalloc failure.\n");
return;
}
/*为请求队列初始化自旋锁*/
spin_lock_init(&dev->lock);
/*设定一个定时器把该设备伪装成一个可移动设备,即设备被卸载后30秒就清除
虚拟磁盘的存储空间清0,即相当于驱动器中换了一个介质*/
init_timer(&dev->timer);
dev->timer.data = (unsigned long) dev;
dev->timer.function = sbull_invalidate;
switch (request_mode) {
/*使用无队列模式,但是还得分配一个队列*/
case RM_NOQUEUE:
dev->queue = blk_alloc_queue(GFP_KERNEL);
if (dev->queue == NULL)
goto out_vfree;
/*告诉内核要使用无队列模式,sbull_make_request为请求处理函*/
blk_queue_make_request(dev->queue, sbull_make_request);
break;
case RM_FULL:
/*申请一个利用bio的请求队列*/
dev->queue = blk_init_queue(sbull_full_request, &dev->lock);
if (dev->queue == NULL)
goto out_vfree;
break;
/*申请一个简单的请求队列,sbull_request是该请求队列的请求处理函数,lock
是请求队列要使用的自旋锁,返回值是表示请求队列结构题的指针*/
case RM_SIMPLE:
dev->queue = blk_init_queue(sbull_request, &dev->lock);
if (dev->queue == NULL)
goto out_vfree;
break;
default:
printk(KERN_NOTICE "Bad request mode %d, using simple\n", request_mode);
}
/*告诉内核该磁盘的硬件扇区大小,这个是必须的,因为内核始终认为一个扇区就是512字节,
设定硬件扇区大小后,所有对扇区的请求都将和硬件扇区对齐,q->hardsect_size = size*/
blk_queue_hardsect_size(dev->queue, hardsect_size);
/*把私有结构体指针保存到请求队列的私有数据中,方便以后使用*/
dev->queue->queuedata = dev;
/*分配一个gendisk结构体来描述磁盘,SBULL_MINORS表示该设备要占用的此设备号,分区的
时候会使用*/
dev->gd = alloc_disk(SBULL_MINORS);
if (! dev->gd) {
printk (KERN_NOTICE "alloc_disk failure\n");
goto out_vfree;
}
dev->gd->major = sbull_major;
dev->gd->first_minor = which*SBULL_MINORS;
dev->gd->fops = &sbull_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf (dev->gd->disk_name, 32, "sbull%c", which + 'a');
/*设置磁盘的大小,单位是扇区数,这里的一个扇区是512字节*/
set_capacity(dev->gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
/*把磁盘信息注册到子系统*/
add_disk(dev->gd);
return;
out_vfree:
if (dev->data)
vfree(dev->data);
}
static int __init sbull_init(void)
{
int i;
/*向内核注册一个快设备,sbull_major是主设备号,如果主设备号出入的是0,
那么内核会分配一个空闲的,"sbull"是为该设备起的名字,如果注册失败返回
值小于0,如果传入的主设备号已经被使用或者分配设备名字结构体失败则注册
就失败了,如果注册成功则在/proc/devices中会有显示*/
sbull_major = register_blkdev(sbull_major, "sbull");
if (sbull_major <= 0) {
printk(KERN_WARNING "sbull: unable to get major number\n");
return -EBUSY;
}
/*分配ndevice个块设备私有结构体*/
Devices = kmalloc(ndevices*sizeof (struct sbull_dev), GFP_KERNEL);
if (Devices == NULL)
goto out_unregister;
/*ndevices默认为4,在这里要建立四个磁盘*/
for (i = 0; i < ndevices; i++)
setup_device(Devices + i, i);
return 0;
out_unregister:
unregister_blkdev(sbull_major, "sbull");
return -ENOMEM;
}
static void sbull_exit(void)
{
int i;
for (i = 0; i < ndevices; i++) {
struct sbull_dev *dev = Devices + i;
del_timer_sync(&dev->timer);
if (dev->gd) {
del_gendisk(dev->gd);
put_disk(dev->gd);
}
if (dev->queue) {
if (request_mode == RM_NOQUEUE)
blk_put_queue(dev->queue);
else
blk_cleanup_queue(dev->queue);
}
if (dev->data)
vfree(dev->data);
}
unregister_blkdev(sbull_major, "sbull");
kfree(Devices);
}
module_init(sbull_init);
module_exit(sbull_exit);