rte_bus_probe()->pci_probe_all_drivers()->rte_pci_probe_one_driver()
接下来分析rte_pci_probe_one_driver的实现
static int rte_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) {
int ret;
bool already_probed;
struct rte_pci_addr *loc;
……
if (!rte_pci_match(dr, dev))
/* Match of device and driver failed */
return 1;
……
if (dev->device.devargs != NULL && //设备是被阻止的
dev->device.devargs->policy == RTE_DEV_BLOCKED) {
RTE_LOG(INFO, EAL, " Device is blocked, not initializing\n");
return 1;
}
……
already_probed = rte_dev_is_probed(&dev->device); //设备已经绑定了驱动
if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
dev->device.name);
return -EEXIST;
}
……
if (!already_probed) { //IOVA模式检查
enum rte_iova_mode dev_iova_mode;
enum rte_iova_mode iova_mode;
dev_iova_mode = pci_device_iova_mode(dr, dev);
iova_mode = rte_eal_iova_mode();
if (dev_iova_mode != RTE_IOVA_DC &&
dev_iova_mode != iova_mode) {
……
return -EINVAL;
}
dev->driver = dr;
}
if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
/* map resources for devices that use igb_uio */
ret = rte_pci_map_device(dev);
if (ret != 0) {
dev->driver = NULL;
return ret;
}
}
……
/* call the driver probe() function */
ret = dr->probe(dr, dev);
if (already_probed)
return ret; /* no rollback if already succeeded earlier */
if (ret) {
dev->driver = NULL;
if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
/* Don't unmap if device is unsupported and
* driver needs mapped resources.
*/
!(ret > 0 &&
(dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
rte_pci_unmap_device(dev);
} else {
dev->device.driver = &dr->driver;
}
return ret;
}
首先调用rte_pci_match(),在该func中,遍历rte_pci_driver结构体中的id_table,id_table中存储了该驱动支持的设备的vendor_id和device_id等信息,依次与设备的信息进行匹配以确定驱动是否支持该设备。确认驱动支持设备,设备不是被阻止的,设备还没有绑定过驱动之后,设备和驱动的IOVA模式相同这几个必要条件之后,继续进行接下来的工作。如果设备驱动指明该设备需要进行内存映射,则调用rte_pci_map_device()对设备进行内存映射。
int rte_pci_map_device(struct rte_pci_device *dev) {
int ret = -1;
switch (dev->kdrv) {
case RTE_PCI_KDRV_VFIO:
……
break;
case RTE_PCI_KDRV_IGB_UIO:
case RTE_PCI_KDRV_UIO_GENERIC:
if (rte_eal_using_phys_addrs()) {
ret = pci_uio_map_resource(dev);
}
break;
default:
……
break;
}
return ret;
}
进行内存映射时根据设备原本绑定的内核驱动模块的不同而不同,以igb_uio驱动为例,首先调用rte_eal_using_phys_addrs()去检查是否允许直接物理地址访问(该内容在介绍DPDK的init过程时的第18步中介绍过,此处不再介绍),允许直接物理地址访问时,在调用pci_uio_map_resource()来实现物理地址映射。
int pci_uio_map_resource(struct rte_pci_device *dev) {
int i, map_idx = 0, ret;
uint64_t phaddr;
struct mapped_pci_resource *uio_res = NULL;
struct mapped_pci_res_list *uio_res_list =
RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
dev->intr_handle.fd = -1;
dev->intr_handle.uio_cfg_fd = -1;
if (rte_eal_process_type() != RTE_PROC_PRIMARY) // DPDK进程是secondary时,才会执行
return pci_uio_map_secondary(dev);
/* allocate uio resource */
ret = pci_uio_alloc_resource(dev, &uio_res);
if (ret)
return ret;
/* Map all BARs */
for (i = 0; i != PCI_MAX_RESOURCE; i++) {
/* skip empty BAR */
phaddr = dev->mem_resource[i].phys_addr;
if (phaddr == 0)
continue;
ret = pci_uio_map_resource_by_index(dev, i, uio_res, map_idx);
if (ret)
goto error;
map_idx++;
}
uio_res->nb_maps = map_idx;
TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
return 0;
……
}
pci_uio_map_resource()中,首先调用pci_uio_alloc_resource()来分配相应的资源并记录在uio_res中。该func()首先调用pci_get_uio_dev()来确定该设备的uio_num是多少,可以认为是设备绑定了igb_uio驱动之后分配的设备号,uio_num通过文件遍历设备在/sys文件系统当中的信息来确定,确定了uio_num之后,会在/dev目录下通过mknod的方式创建一个对应的设备文件,文件名称为uioX,X=uio_num。接下来打开uioX文件,并将文件描述符记录在intr_handle.fd中,另外打开uioX在/sys文件系统中的配置文件,最后为uio_res分配空间,将设备文件路径和设备的PCI地址存放在其中。
int pci_uio_alloc_resource(struct rte_pci_device *dev, struct mapped_pci_resource **uio_res) {
char dirname[PATH_MAX];
char cfgname[PATH_MAX];
char devname[PATH_MAX]; /* contains the /dev/uioX */
int uio_num;
struct rte_pci_addr *loc;
loc = &dev->addr;
/* find uio resource */
uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
……
snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
dev->intr_handle.fd = open(devname, O_RDWR); // /dev/uioX文件
……
snprintf(cfgname, sizeof(cfgname), "/sys/class/uio/uio%u/device/config", uio_num); // uioX配置文件
dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
……
/* allocate the mapping details for secondary processes*/
*uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
if (*uio_res == NULL) {
RTE_LOG(ERR, EAL,
"%s(): cannot store uio mmap details\n", __func__);
goto error;
}
strlcpy((*uio_res)->path, devname, sizeof((*uio_res)->path));
memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));
return 0;
……
}
pci_uio_map_resource()中,接下来调用pci_uio_map_resource_by_index()遍历设备的6个bar空间信息(bar空间信息在DPDK的init过程中已经读入),如果某个bar空间指明有物理地址,则调用pci_uio_map_resource_by_index()来映射资源。该func中,打开设备的某个bar空间在/sys文件系统对应文件resourceX,调用pci_find_max_end_va()找到一块结尾地址最大的虚拟地址空间(遍历mem_config->memsegs);找到符合要求的虚拟地址空间之后,调用pci_map_resource()建立虚拟地址空间和打开的bar文件的映射,pci_map_resource()的实现实际上是调用的mmap()这个Linux的系统调用;最后将resourceX文件的路径,bar空间记录的物理地址及长度,获取到的映射地址等记录在uio_res->maps数组的某个位置。
int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) {
int fd = -1;
char devname[PATH_MAX];
void *mapaddr;
struct rte_pci_addr *loc;
struct pci_map *maps;
int wc_activate = 0;
……
loc = &dev->addr;
maps = uio_res->maps;
maps[map_idx].path = rte_malloc(NULL, sizeof(devname), 0);
……
if (!wc_activate || fd < 0) {
snprintf(devname, sizeof(devname),
"%s/" PCI_PRI_FMT "/resource%d",
rte_pci_get_sysfs_path(),
loc->domain, loc->bus, loc->devid,
loc->function, res_idx);
/* then try to map resource file */
fd = open(devname, O_RDWR);
if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
devname, strerror(errno));
goto error;
}
}
/* try mapping somewhere close to the end of hugepages */
if (pci_map_addr == NULL)
pci_map_addr = pci_find_max_end_va();
mapaddr = pci_map_resource(pci_map_addr, fd, 0, (size_t)dev->mem_resource[res_idx].len, 0);
close(fd);
……
pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t)dev->mem_resource[res_idx].len);
pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, sysconf(_SC_PAGE_SIZE));
maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
maps[map_idx].size = dev->mem_resource[res_idx].len;
maps[map_idx].addr = mapaddr;
maps[map_idx].offset = 0;
strcpy(maps[map_idx].path, devname);
dev->mem_resource[res_idx].addr = mapaddr;
……
}
pci_uio_map_resource()中,最后将uio_res插入到uio_res_list,rte_pci_map_device()即完成相关内存映射工作。
回到rte_pci_probe_one_driver(),接下来调用与设备匹配的驱动的probe方法,此处以ixgbe设备的驱动为例,其probe方法为eth_ixgbe_pci_probe(),该内容在下一节中进行分析…….