static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
//...
create_cdev:
//利用miscdev结构体提供一些字符设备的操作(回调函数),用户空间可以下发一些nvme的命令等
scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
dev->miscdev.minor = MISC_DYNAMIC_MINOR;
dev->miscdev.parent = &pdev->dev;
dev->miscdev.name = dev->name;
dev->miscdev.fops = &nvme_dev_fops;
result = misc_register(&dev->miscdev); //将字符设备那一堆函数在汇总在一起
if (result)
goto remove;
kref_init(&dev->kref);//设备引用计数初始化,值为1
return 0;
//...
}
static int nvme_dev_open(struct inode *inode, struct file *f)
{
struct nvme_dev *dev = container_of(f->private_data, struct nvme_dev, miscdev);
kref_get(&dev->kref);
f->private_data = dev;
return 0;
}
static int nvme_dev_release(struct inode *inode, struct file *f)
{
struct nvme_dev *dev = f->private_data;
kref_put(&dev->kref, nvme_free_dev);
return 0;
}
static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
{
struct nvme_dev *dev = f->private_data;
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_admin_cmd(dev, (void __user *)arg);
default:
return -ENOTTY;
}
}
static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
.release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
};
static int nvme_user_admin_cmd(struct nvme_dev *dev, struct nvme_admin_cmd __user *ucmd)
{
struct nvme_admin_cmd cmd;
struct nvme_command c;
int status, length;
struct nvme_iod *uninitialized_var(iod);
unsigned timeout;
if (!capable(CAP_SYS_ADMIN))//sudo
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
memset(&c, 0, sizeof(c));
c.common.opcode = cmd.opcode;
c.common.flags = cmd.flags;
c.common.nsid = cpu_to_le32(cmd.nsid);
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
length = cmd.data_len;
if (cmd.data_len) {
/*将用户态传下来的地址,转成page结构体*/
iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, length);
if (IS_ERR(iod))
return PTR_ERR(iod);
length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL);
}
/*
timeout:发命令的超时时间
msecs_to_jiffies:ms转jiffies
*/
timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : ADMIN_TIMEOUT;
if (length != cmd.data_len)
status = -ENOMEM;
else
status = nvme_submit_sync_cmd(dev->queues[0], &c, &cmd.result, timeout);
/*解除映射*/
if (cmd.data_len) {
nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
nvme_free_iod(dev, iod);
}
/*将命令的执行结果返回给用户态*/
if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, sizeof(cmd.result)))
status = -EFAULT;
return status;
}
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length)
{
int i, err, count, nents, offset;
struct scatterlist *sg;
struct page **pages;
struct nvme_iod *iod;
if (addr & 3) //地址是4字节对齐。因为struct scatterlist结构体的page_link的低两位有其它用途了
return ERR_PTR(-EINVAL);
if (!length || length > INT_MAX - PAGE_SIZE)
return ERR_PTR(-EINVAL);
offset = offset_in_page(addr);//0x12345678 -> 0x678 = 1656, 假设length=3000
//计算需要几个page, offset(起始处)是struct page的起始处,所以是offset+length计算的需要几个page
count = DIV_ROUND_UP(offset + length, PAGE_SIZE); //((offset + length) / PAGE_SIZE) + 1
//申请内存
pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
if (!pages)
return ERR_PTR(-ENOMEM);
/*addr可能不是4k对齐的,后续可能需要offset*/
err = get_user_pages_fast(addr, count, 1, pages);//pin 用户态虚拟地址,返回对应的struct page结构
if (err < count) {
count = err;
err = -EFAULT;
goto put_pages;
}
iod = nvme_alloc_iod(count, length, GFP_KERNEL);
sg = iod->sg;
sg_init_table(sg, count);
for (i = 0; i < count; i++) {
/*
page->|-------|
| |
| |
offset->|------ |
|-------|
*/
//len offset第一次 len = 2440, offset=1656,第二次 len=560,offset=0 刚好符合length=3000
sg_set_page(&sg[i], pages[i], min_t(unsigned, length, PAGE_SIZE - offset), offset);
length -= (PAGE_SIZE - offset);
offset = 0;
}
sg_mark_end(&sg[i - 1]);
iod->nents = count;
err = -ENOMEM;
//将struct scatterlist记录的每一个sge内核态虚拟地址映射为dma地址
nents = dma_map_sg(&dev->pci_dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (!nents)
goto free_iod;
kfree(pages);
return iod;
free_iod:
kfree(iod);
put_pages:
for (i = 0; i < count; i++)
put_page(pages[i]);//addr对应的struct page
kfree(pages);
return ERR_PTR(err);
}
void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod)
{
int i;
dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
for (i = 0; i < iod->nents; i++)
put_page(sg_page(&iod->sg[i]));
}
调用完以后在dev目录下也可以看到相关的设备节点了。