块设备驱动程序实现
从虚拟文件系统,文件系统,逻辑卷一路顺流而下,一直来到了硬盘块设备驱动程序。从上层看下层,总是被系统的实现搬弄得云遮雾障,不明所以,因此,干吗不干脆从底层开始慢慢往上爬。
OK,让我们就从块设备的驱动程序开始吧。
首先定义一些常用变量
(为什么是GK? GK stands for gingko。这是Gingko Storage System的一个组成部分。什么是Gingko Storage System? -_-!!!市面上看不道的,呵呵。)
#define GKDD_MAJOR 0 //主设备号
#define GKDD_SIZE 2048 //磁盘大小
#define GKDD_BLKSIZE 1024 //存取块的大小,
#define GKDD_HARDSECT 512 //每个扇区的大小
这就是我们的硬盘的基本参数了,下面还要为我们的驱动设定几个变量
static int gkdd_major = 0;
module_param( gkdd_major, int, 0 );
static int hardsect_size = 512;
module_param( hardsect_size, int, 0 );
static int nsectors = 1024;
module_param( nsectors, int, 0 );
static int ndevices = 4;
module_param( ndevices, int, 0 );
模块可以传递4个int型参数,4块硬盘,每块盘1024个扇区,每个扇区512字节。
enum
{
RM_SIMPLE = 0,
RM_FULL = 1,
RM_NOQUEUE = 2,
};
static int request_mode = RM_SIMPLE;
module_param( request_mode, int, 0 );
使用简单的数据请求模式。
struct gkdd_dev
{
int size;
u8 *data;
short users;
short media_change;
spinlock_t lock;
struct request_queue *queue;
struct gendisk *gd;
struct timer_list timer;
};
static struct gkdd_dev *Devices = NULL;
好了,这就是咱的硬盘数据结构,有该硬盘在内核的表示gd, 硬盘的操作请求队列queue,设备操作自旋锁,以及操作延时的定时器,通过定时器模拟一个可更换介质的快设备,如你的DVD。
一切准备就绪,向内核注册咱们的硬盘吧。
static int gkdd_init_module(void)
{
int i;
gkdd_major = register_blkdev( gkdd_major, "gingko_disk" );
if( gkdd_major <= 0 )
{
printk( KERN_WARNING "GKDD-disk: unable to get major number/n" );
return -EBUSY;
}
Devices = kmalloc( ndevices*sizeof( struct gkdd_dev ), GFP_KERNEL );
if( Devices == NULL )
goto out_unregister;
for( i=0; i< ndevices; i++ )
setup_device( Devices + i, i );
printk( KERN_DEBUG "Module gkdd init/n" );
return 0;
out_unregister:
unregister_blkdev( gkdd_major, "sbd" );
return -ENOMEM;
}
module_init(gkdd_init_module);
当然,这里面有一个关键的函数,就是set_updevice()。
static void setup_device( struct gkdd_dev *dev, int which )
{
memset( dev, 0, sizeof( struct gkdd_dev ));
dev->size = nsectors*hardsect_size; //设备的大小
dev->data = vmalloc( dev->size ); //设备的数据空间,因为是RAMDISK,直接申请全部的空间,反正是虚存,不太过分,总能应付。
if( dev->data == NULL )
{
printk( KERN_NOTICE "vmalloc failure./n" );
return;
}
spin_lock_init( &dev->lock );
init_timer( &dev->timer );
dev->timer.data = (unsigned long) dev;
dev->timer.function = gkdd_invalidate; //延迟30秒后,该设备的介质就认为已经被人去下来了,通过该函数,使其失效
switch( request_mode )
{
case RM_NOQUEUE:
dev->queue = blk_alloc_queue( GFP_KERNEL );
if( dev->queue == NULL )
goto out_vfree;
blk_queue_make_request( dev->queue, gkdd_make_request );
break;
case RM_FULL:
dev->queue = blk_init_queue( gkdd_full_request, &dev->lock );
if( dev->queue == NULL )
goto out_vfree;
break;
default:
printk( KERN_NOTICE "Bad request_mode %d, using simple/n", request_mode );
case RM_SIMPLE: //我们处理的是这种模式,当然,你也可以改成其他模式测试以下
dev->queue = blk_init_queue( gkdd_request, &dev->lock );
if( dev->queue == NULL )
goto out_vfree;
break;
}
blk_queue_hardsect_size( dev->queue, hardsect_size );
dev->queue->queuedata = dev;
dev->gd = alloc_disk( GKDD_MINORS ); //获得硬盘在内核的数据结构
if( !dev->gd )
{
printk( KERN_NOTICE "alloc_disk failure/n" );
goto out_vfree;
}
dev->gd->major = gkdd_major;
dev->gd->first_minor = which*GKDD_MINORS;
dev->gd->fops = &gkdd_ops; //这个fops很熟悉吧,在哪儿看到过类似的模样?
dev->gd->queue = dev->queue; //请求队列
dev->gd->private_data = dev; //保存我们的设备结构
snprintf( dev->gd->disk_name, 32, "gkdd_disk_%c", which + 'a' ); //设备名,/dev/gkdd_disk_a, b, c, d
set_capacity( dev->gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE) ); //容量,以扇区为单位
add_disk( dev->gd ); //加入到内核
return;
out_vfree:
if( dev->data )
vfree( dev->data );
}
这就是我们的硬盘提供的操作借口了:
static struct block_device_operations gkdd_ops =
{
.owner = THIS_MODULE, //这个就是这个值,没得选择
.open = gkdd_open,
.release = gkdd_release,
.media_changed = gkdd_media_changed,
.revalidate_disk = gkdd_revalidate,
.ioctl = gkdd_ioctl
};
通过该方法,向内核中注册了4块硬盘,不过现在还不能fdisk -l。
从open开始:
static int gkdd_open( struct inode *inode, struct file *filp )
{
struct gkdd_dev *dev = inode->i_bdev->bd_disk->private_data; //我们的设备结构保存在gendisk的private_data里
del_timer_sync( &dev->timer ); //如果上次关闭设置了定时器,而在30秒内,你又使用了该设备,就不使它实效了
filp->private_data = dev; //搬到虚拟文件系统里去
spin_lock( &dev->lock );
if( !dev->users ) //查看是否更换了介质,这个纯粹为了模拟CD,DVD等类似设备提供
check_disk_change( inode->i_bdev );
dev->users++; //增加引用计数
spin_unlock( &dev->lock );
return 0;
}
release
static int gkdd_release( struct inode *inode, struct file *filp )
{
struct gkdd_dev *dev = inode->i_bdev->bd_disk->private_data;
spin_lock( &dev->lock );
dev->users--;
if( !dev->users ) //没有人使用了,就设置一个期限,否则,介质就认为丢了
{
dev->timer.expires = jiffies + INVALIDATE_DELAY;
add_timer( &dev->timer );
}
spin_unlock( &dev->lock );
return 0;
}
更换设备后,需要一些初始化的操作,
int gkdd_media_changed( struct gendisk *gd )
{
struct gkdd_dev *dev = gd->private_data;
return dev->media_change;
}
激活初始化后,你的硬盘上的文件系统什么的就被memset为0了。
int gkdd_revalidate( struct gendisk *gd )
{
struct gkdd_dev *dev = gd->private_data;
if( dev->media_change )
{
dev->media_change = 0;
memset( dev->data, 0, dev->size );
}
return 0;
}
为了提供某些应用如fdisk等,需要知道CHS信息而提供。现在大家都用LBA了,所以还等将LBA换算成CHS,其计算方法如下:
CHS2LBA
LBA = (C*number of heads per cylinder + H)*number of sectors per head + S-1
LBA2CHS
C = LBA / (heads per cylinder * sectors per track)
temp = LBA % (heads per cylinder * sectors per track)
H = temp / sectors per track
S = temp % sectors per track + 1
当然,这只是一个简单的换算了,相信世纪的硬盘实现,为了考虑磁盘转速和数据处理以及缓存大小之间的关系,实际上的对应关系应该不是这样的。而我们的gkdd就更假了,意思一下而已。
int gkdd_ioctl( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg )
{
long size;
struct hd_geometry geo;
struct gkdd_dev *dev = filp->private_data;
switch( cmd )
{
case HDIO_GETGEO:
size = dev->size*(hardsect_size/KERNEL_SECTOR_SIZE);
geo.cylinders = ( size & ~0x3f ) >> 6;
geo.heads = 4;
geo.sectors = 16;
geo.start = 4;
if( copy_to_user( (void __user *)arg, &geo, sizeof(geo)))
return -EFAULT;
return 0;
}
return -ENOTTY;
}
现在硬盘可以接受用户的操作请求了,
//读写传输请求
static void gkdd_transger( struct gkdd_dev *dev, unsigned long sector, unsigned long nsect, char *buffer, int write )
{
unsigned long ffset = sector * KERNEL_SECTOR_SIZE;
unsigned long nbytes = nsect * KERNEL_SECTOR_SIZE;
if( (offset + nbytes) > dev->size )
{
printk( KERN_DEBUG "GKDD-disk: Beyond-end write (%ld %ld)/n", offset, nbytes);
return;
}
if( write )
memcpy( dev->data + offset, buffer, nbytes ); //写请求,从用户空间写入硬盘
else
memcpy( buffer, dev->data + offset, nbytes ); //读请求,相反
}
static void gkdd_request( request_queue_t *q )
{
struct request *req;
while( (req = elv_next_request(q)) != NULL )
{
struct gkdd_dev *dev = req->rq_disk->private_data;
if( !blk_fs_request(req) )
{
printk( KERN_NOTICE "GKDD-disk: Skip non-fs request/n" );
end_request( req, 0 );
continue;
}
gkdd_transger( dev, req->sector, req->current_nr_sectors, req->buffer, rq_data_dir( req ));
end_request( req, 1 );
}
}
//需要传输一组数据块时,就要用到block I/O数据结构了,即struct bio,这里对没块block还是慢慢transfer吧。
static int gkdd_xfer_bio( struct gkdd_dev *dev, struct bio *bio )
{
int i;
struct bio_vec *bvec;
sector_t sector = bio->bi_sector;
bio_for_each_segment( bvec, bio, i )
{
char *buffer = __bio_kmap_atomic( bio, i, KM_USER0 );
gkdd_transger( dev, sector, bio_cur_sectors( bio ), buffer, bio_data_dir( bio ) == WRITE );
sector += bio_cur_sectors( bio );
__bio_kunmap_atomic( bio, KM_USER0 );
}
return 0;
}
static int gkdd_xfer_request( struct gkdd_dev *dev, struct request *req )
{
struct bio *bio;
int nsect = 0;
rq_for_each_bio( bio, req )
{
gkdd_xfer_bio( dev, bio );
nsect += bio->bi_size/KERNEL_SECTOR_SIZE;
}
return nsect;
}
static void gkdd_full_request( request_queue_t *q )
{
struct request *req;
int sectors_xferred;
struct gkdd_dev *dev = q->queuedata;
while( (req = elv_next_request( q )) != NULL )
{
if( !blk_fs_request( req ) )
{
printk( KERN_NOTICE "GKDD-disk: Skip non-fs request/n" );
end_request( req, 0 );
continue;
}
sectors_xferred = gkdd_xfer_request( dev, req );
if( !end_that_request_first( req, 1, sectors_xferred ) )
{
blkdev_dequeue_request( req );
end_that_request_last( req, 0 ); //*********need to be rethink ************
}
}
}
static int gkdd_make_request( request_queue_t *q, struct bio *bio )
{
struct gkdd_dev *dev = q->queuedata;
int status;
status = gkdd_xfer_bio( dev, bio );
bio_endio( bio, bio->bi_size, status );
return 0;
}
//退出模块,去要注销设备,并释放先前申请的资源。
static void gkdd_exit_module(void)
{
int i;
for( i=0; i<ndevices; i++ )
{
struct gkdd_dev *dev = Devices + i;
del_timer_sync( &dev->timer );
if( dev->gd )
{
del_gendisk( dev->gd );
put_disk( dev->gd );
}
if( dev->queue )
{
if( request_mode == RM_NOQUEUE )
blk_put_queue( dev->queue );
else
blk_cleanup_queue( dev->queue );
}
if( dev->data )
vfree( dev->data );
}
unregister_blkdev( gkdd_major, "gingko_disk" );
kfree( Devices );
printk( KERN_DEBUG "Module gkdd exit/n" );
}
module_exit(gkdd_exit_module);
}