mtd层次分析

关于mtd的层次分析,网上一大堆,先看看网上怎么说的.

MTD(memory technology device内存技术设备)是用于访问memory设备(ROM、flash)的Linux的子系统。MTD的主要目的是为了使新的memory设备的驱 动更加简单,为此它在硬件和上层之间提供了一个抽象的接口。MTD的所有源代码在/drivers/mtd子目录下。我将CFI接口的MTD设备分为四层 (从设备节点直到底层硬件驱动),这四层从上到下依次是:设备节点、MTD设备层、MTD原始设备层和硬件驱动层。 

  一、Flash硬件驱动层:硬件驱动层负责在init时驱动Flash硬件,Linux MTD设备的NOR Flash芯片驱动遵循CFI接口标准,其驱动程序位于drivers/mtd/chips子目录下。NAND型Flash的驱动程 序则位于/drivers/mtd/nand子目录下。

  二、MTD原始设备:原始设备层有两部分组成,一部分是MTD原始设备的通用代码,另一部分是各个特定的Flash的数据,例如分区。 用于描述MTD原始设备的数据结构是mtd_info,这其中定义了大量的关于MTD的数据和操作函数。mtd_table(mtdcore.c)则是所 有MTD原始设备的列表,mtd_part(mtd_part.c)是用于表示MTD原始设备分区的结构,其中包含了mtd_info,因为每一个分区都 是被看成一个MTD原始设备加在mtd_table中的,mtd_part.mtd_info中的大部分数据都从该分区的主分区 mtd_part->master中获得。 在drivers/mtd/maps/子目录下存放的是特定的flash的数据,每一个文件都描述了一块板子上的flash。其中调用 add_mtd_device()、del_mtd_device()建立/删除mtd_info结构并将其加入/删除mtd_table(或者调用 add_mtd_partition()、del_mtd_partition()(mtdpart.c)建立/删除mtd_part结构并将 mtd_part.mtd_info加入/删除mtd_table 中)。 

  三、MTD设备层:基于MTD原始设备,linux系统可以定义出MTD的块设备(主设备号31)和字符设备(设备号90)。MTD字符设备的定义在 mtdchar.c中实现,通过注册一系列file operation函数(lseek、open、close、read、write)。MTD块设备则是定义了一个描述MTD块设备的结构 mtdblk_dev,并声明了一个名为mtdblks的指针数组,这数组中的每一个mtdblk_dev和mtd_table中的每一个 mtd_info一一对应。 

  四、设备节点:通过mknod在/dev子目录下建立MTD字符设备节点(主设备号为90)和MTD块设备节点(主设备号为31),通过访问此设备节点即可访问MTD字符设备和块设备。 

  五、根文件系统:在Bootloader中将JFFS(或JFFS2)的文件系统映像jffs.image(或jffs2.img)烧到flash的 某一个分区中,在/arch/arm/mach-your/arch.c文件的your_fixup函数中将该分区作为根文件系统挂载。 

  六、文件系统:内核启动后,通过mount 命令可以将flash中的其余分区作为文件系统挂载到mountpoint上。



我认为mtd层是介于vfs和具体的flash设备之间,就是为了屏蔽下层flash设备的多样化而存在。nand flash是块设备,访问nand flash内容,其实和一般的块设备差不多,那些驱动架构都一样。先从mtd设备层开始,先看一看

struct mtdblk_dev {
struct mtd_blktrans_dev mbd;
int count;
struct mutex cache_mutex;
unsigned char *cache_data;
unsigned long cache_offset;
unsigned int cache_size;
enum { STATE_EMPTY, STATE_CLEAN, STATE_DIRTY } cache_state;
};

看其中里面的struct mtd_blktrans_dev {
struct mtd_blktrans_ops *tr;
struct list_head list;
struct mtd_info *mtd;
struct mutex lock;
int devnum;
unsigned long size;
int readonly;
int open;
struct kref ref;
struct gendisk *disk;
struct attribute_group *disk_attributes;
struct task_struct *thread;
struct request_queue *rq;
spinlock_t queue_lock;
void *priv;
};

mtd_blktrans_dev这个结构和一般的块设备结构很类似吧。访问nand的内容,从vfs到了mtd后,应该主要是操作mtd_blktrans_dev这个结构体。

在mtdblock.c中有这样一个函数

static int __init init_mtdblock(void)
{
mutex_init(&mtdblks_lock);
return register_mtd_blktrans(&mtdblock_tr);
}

这个就是向内核注册mtd层。

static struct mtd_blktrans_ops mtdblock_tr = {
.name = "mtdblock",
.major = 31,
.part_bits = 0,
.blksize = 512,
.open = mtdblock_open,
.flush = mtdblock_flush,
.release = mtdblock_release,
.readsect = mtdblock_readsect,
.writesect = mtdblock_writesect,
.add_mtd = mtdblock_add_mtd,
.remove_dev = mtdblock_remove_dev,
.owner = THIS_MODULE,
};

对nand的各种操作都在这个结构体中,好吧 我们随便看一个是怎么实现的。

static int mtdblock_readsect(struct mtd_blktrans_dev *dev,
     unsigned long block, char *buf)
{
struct mtdblk_dev *mtdblk = container_of(dev, struct mtdblk_dev, mbd);
return do_cached_read(mtdblk, block<<9, 512, buf);
}

static int do_cached_read (struct mtdblk_dev *mtdblk, unsigned long pos,
  int len, char *buf)
{
struct mtd_info *mtd = mtdblk->mbd.mtd; //必须要得到mtd_info结构,该结构包含直接控制nand的函数
unsigned int sect_size = mtdblk->cache_size;
size_t retlen;
int ret;


DEBUG(MTD_DEBUG_LEVEL2, "mtdblock: read on \"%s\" at 0x%lx, size 0x%x\n",
mtd->name, pos, len);


if (!sect_size)
return mtd->read(mtd, pos, len, &retlen, buf);


while (len > 0) {
unsigned long sect_start = (pos/sect_size)*sect_size;
unsigned int offset = pos - sect_start;
unsigned int size = sect_size - offset;
if (size > len)
size = len;


/*
* Check if the requested data is already cached
* Read the requested amount of data from our internal cache if it
* contains what we want, otherwise we read the data directly
* from flash.
*/
if (mtdblk->cache_state != STATE_EMPTY &&
   mtdblk->cache_offset == sect_start) {
memcpy (buf, mtdblk->cache_data + offset, size);
} else {
ret = mtd->read(mtd, pos, size, &retlen, buf);//调用真正地访问nand的函数,这些函数式在内核启动探测到nand设备时确定的
if (ret)
return ret;
if (retlen != size)
return -EIO;
}


buf += size;
pos += size;
len -= size;
}


return 0;
}

好吧,到这里,应该知道通过struct mtdblk_dev 这个结构在mtd层可以操控真实的nand了,那么作为一个块设备,内核访问时,有固定的架构。通常是通过bio,以及reques来控制的。接下来看看register_mtd_blktrans(&mtdblock_tr);

int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
{
struct mtd_info *mtd;
int ret;


/* Register the notifier if/when the first device type is
  registered, to prevent the link/init ordering from fucking
  us over. */
if (!blktrans_notifier.list.next)
register_mtd_user(&blktrans_notifier);




mutex_lock(&mtd_table_mutex);


ret = register_blkdev(tr->major, tr->name);//注册设备号,必须的
if (ret) {
printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
      tr->name, tr->major, ret);
mutex_unlock(&mtd_table_mutex);
return ret;
}


tr->blkshift = ffs(tr->blksize) - 1;


INIT_LIST_HEAD(&tr->devs);
list_add(&tr->list, &blktrans_majors);


mtd_for_each_device(mtd)
if (mtd->type != MTD_ABSENT)
tr->add_mtd(tr, mtd);//这个函数相当关键


mutex_unlock(&mtd_table_mutex);
return 0;
}

add_mtd是static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
{
struct mtdblk_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);

if (!dev)
return;

dev->mbd.mtd = mtd;
dev->mbd.devnum = mtd->index;
dev->mbd.size = mtd->size >> 9;
dev->mbd.tr = tr;
if (!(mtd->flags & MTD_WRITEABLE))
dev->mbd.readonly = 1;
if (add_mtd_blktrans_dev(&dev->mbd))
kfree(dev);
}

到了最重要的初始化mtd块设备函数了

int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
{
struct mtd_blktrans_ops *tr = new->tr;
struct mtd_blktrans_dev *d;
int last_devnum = -1;
struct gendisk *gd;
int ret;
if (mutex_trylock(&mtd_table_mutex)) {
mutex_unlock(&mtd_table_mutex);
BUG();
}
mutex_lock(&blktrans_ref_mutex);
list_for_each_entry(d, &tr->devs, list) {
if (new->devnum == -1) {
/* Use first free number */
if (d->devnum != last_devnum+1) {
/* Found a free devnum. Plug it in here */
new->devnum = last_devnum+1;
list_add_tail(&new->list, &d->list);
goto added;
}
} else if (d->devnum == new->devnum) {
/* Required number taken */
mutex_unlock(&blktrans_ref_mutex);
return -EBUSY;
} else if (d->devnum > new->devnum) {
/* Required number was free */
list_add_tail(&new->list, &d->list);
goto added;
}
last_devnum = d->devnum;
}


ret = -EBUSY;
if (new->devnum == -1)
new->devnum = last_devnum+1;

/* Check that the device and any partitions will get valid
* minor numbers and that the disk naming code below can cope
* with this number. */
if (new->devnum > (MINORMASK >> tr->part_bits) ||
   (tr->part_bits && new->devnum >= 27 * 26)) {
mutex_unlock(&blktrans_ref_mutex);
goto error1;
}

list_add_tail(&new->list, &tr->devs);
 added:
mutex_unlock(&blktrans_ref_mutex);
mutex_init(&new->lock);
kref_init(&new->ref);
if (!tr->writesect)
new->readonly = 1;


/* Create gendisk */
ret = -ENOMEM;
gd = alloc_disk(1 << tr->part_bits);

if (!gd)
goto error2;
new->disk = gd;
gd->private_data = new;
gd->major = tr->major;
gd->first_minor = (new->devnum) << tr->part_bits;
gd->fops = &mtd_blktrans_ops;


if (tr->part_bits)
if (new->devnum < 26)
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c", tr->name, 'a' + new->devnum);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%c%c", tr->name,
'a' - 1 + new->devnum / 26,
'a' + new->devnum % 26);
else
snprintf(gd->disk_name, sizeof(gd->disk_name),
"%s%d", tr->name, new->devnum);


set_capacity(gd, (new->size * tr->blksize) >> 9);


/* Create the request queue */
spin_lock_init(&new->queue_lock);
new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);


if (!new->rq)
goto error3;


new->rq->queuedata = new;
blk_queue_logical_block_size(new->rq, tr->blksize);


if (tr->discard)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
new->rq);


gd->queue = new->rq;


__get_mtd_device(new->mtd);
__module_get(tr->owner);


/* Create processing thread */
/* TODO: workqueue ? */
new->thread = kthread_run(mtd_blktrans_thread, new,
"%s%d", tr->name, new->mtd->index);
if (IS_ERR(new->thread)) {
ret = PTR_ERR(new->thread);
goto error4;
}
gd->driverfs_dev = &new->mtd->dev;

if (new->readonly)
set_disk_ro(gd, 1);
add_disk(gd);
if (new->disk_attributes) {
ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
new->disk_attributes);
WARN_ON(ret);
}
return 0;
error4:
module_put(tr->owner);
__put_mtd_device(new->mtd);
blk_cleanup_queue(new->rq);
error3:
put_disk(new->disk);
error2:
list_del(&new->list);
error1:
kfree(new);
return ret;
}

这个函数和ldd3上的块设备初始化函数很像吧,这个就好懂了。

注意new->thread = kthread_run(mtd_blktrans_thread, new,
"%s%d", tr->name, new->mtd->index);这行代码很关键,它开启了一个工作线程专门处理块设备请求的。

看看这个线程是怎么工作的。

static int mtd_blktrans_thread(void *arg)
{
struct mtd_blktrans_dev *dev = arg;
struct request_queue *rq = dev->rq;
struct request *req = NULL;


spin_lock_irq(rq->queue_lock);


while (!kthread_should_stop()) {
int res;


if (!req && !(req = blk_fetch_request(rq))) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(rq->queue_lock);
schedule();
spin_lock_irq(rq->queue_lock);
continue;
}


spin_unlock_irq(rq->queue_lock);


mutex_lock(&dev->lock);
res = do_blktrans_request(dev->tr, dev, req);
mutex_unlock(&dev->lock);


spin_lock_irq(rq->queue_lock);


if (!__blk_end_request_cur(req, res))
req = NULL;
}


if (req)
__blk_end_request_all(req, -EIO);


spin_unlock_irq(rq->queue_lock);


return 0;
}

前面都是获得请求,do_blktrans_request是真正处理请求的函数。

static int do_blktrans_request(struct mtd_blktrans_ops *tr,
      struct mtd_blktrans_dev *dev,
      struct request *req)
{
unsigned long block, nsect;
char *buf;

block = blk_rq_pos(req) << 9 >> tr->blkshift;
nsect = blk_rq_cur_bytes(req) >> tr->blkshift;

buf = req->buffer;

if (req->cmd_type != REQ_TYPE_FS)
return -EIO;
if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
   get_capacity(req->rq_disk))
return -EIO;
if (req->cmd_flags & REQ_DISCARD)
return tr->discard(dev, block, nsect);

switch(rq_data_dir(req)) {
case READ:
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->readsect(dev, block, buf))
return -EIO;
rq_flush_dcache_pages(req);
return 0;
case WRITE:
if (!tr->writesect)
return -EIO;


rq_flush_dcache_pages(req);
for (; nsect > 0; nsect--, block++, buf += tr->blksize)
if (tr->writesect(dev, block, buf))
return -EIO;
return 0;
default:
printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
return -EIO;
}
}

这个函数先判断是什么请求,在执行具体的操作。tr->writesect(dev, block, buf));  tr->writesect会调用相关的mtd_info里面的函数。


到这里了,估计通过mtd层访问下层flash设备已经清楚了,如果你好奇心强点,那么mtd_info怎么跟底层的真正nandflash驱动挂钩了?

在register_mtd_blktrans(struct mtd_blktrans_ops *tr)函数里面

mtd_for_each_device(mtd)
if (mtd->type != MTD_ABSENT)
tr->add_mtd(tr, mtd);

这个就是遍历mtd_info,然后add。

#define MTD_ABSENT 0
#define MTD_RAM 1
#define MTD_ROM 2
#define MTD_NORFLASH 3
#define MTD_NANDFLASH 4
#define MTD_DATAFLASH 6
#define MTD_UBIVOLUME 7

从这里应该可以看出将所有的flash设备信息都可以探测到,并添加,这些信息mtd_info是相应的设备驱动探测后初始化到mtd_info的,并组织到一起。通过上述的代码,将所有的flash设备都可以注册。

你可能感兴趣的:(mtd层次分析)