DPU网络开发SDK——DPDK(十一)

rte_bus_probe()->pci_probe_all_drivers()->rte_pci_probe_one_driver()

接下来分析rte_pci_probe_one_driver的实现

static int rte_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) {
   int ret;
   bool already_probed;
   struct rte_pci_addr *loc;
   ……
   if (!rte_pci_match(dr, dev))
      /* Match of device and driver failed */
      return 1;
   ……
   if (dev->device.devargs != NULL && //设备是被阻止的
      dev->device.devargs->policy == RTE_DEV_BLOCKED) {
      RTE_LOG(INFO, EAL, "  Device is blocked, not initializing\n");
      return 1;
   }
   ……
   already_probed = rte_dev_is_probed(&dev->device); //设备已经绑定了驱动
   if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
      RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
            dev->device.name);
      return -EEXIST;
   }
   ……
   if (!already_probed) { //IOVA模式检查
      enum rte_iova_mode dev_iova_mode;
      enum rte_iova_mode iova_mode;

      dev_iova_mode = pci_device_iova_mode(dr, dev);
      iova_mode = rte_eal_iova_mode();
      if (dev_iova_mode != RTE_IOVA_DC &&
          dev_iova_mode != iova_mode) {
         ……
         return -EINVAL;
      }
      dev->driver = dr;
   }

   if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
      /* map resources for devices that use igb_uio */
      ret = rte_pci_map_device(dev);
      if (ret != 0) {
         dev->driver = NULL;
         return ret;
      }
   }
   ……
   /* call the driver probe() function */
   ret = dr->probe(dr, dev);
   if (already_probed)
      return ret; /* no rollback if already succeeded earlier */
   if (ret) {
      dev->driver = NULL;
      if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
         /* Don't unmap if device is unsupported and
          * driver needs mapped resources.
          */
         !(ret > 0 &&
            (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
         rte_pci_unmap_device(dev);
   } else {
      dev->device.driver = &dr->driver;
   }

   return ret;
}

首先调用rte_pci_match(),在该func中,遍历rte_pci_driver结构体中的id_table,id_table中存储了该驱动支持的设备的vendor_id和device_id等信息,依次与设备的信息进行匹配以确定驱动是否支持该设备。确认驱动支持设备,设备不是被阻止的,设备还没有绑定过驱动之后,设备和驱动的IOVA模式相同这几个必要条件之后,继续进行接下来的工作。如果设备驱动指明该设备需要进行内存映射,则调用rte_pci_map_device()对设备进行内存映射。

int rte_pci_map_device(struct rte_pci_device *dev) {
   int ret = -1;

   switch (dev->kdrv) {
   case RTE_PCI_KDRV_VFIO:
      ……
      break;
   case RTE_PCI_KDRV_IGB_UIO:
   case RTE_PCI_KDRV_UIO_GENERIC:
      if (rte_eal_using_phys_addrs()) {
         ret = pci_uio_map_resource(dev);
      }
      break;
   default:
      ……
      break;
   }
   return ret;
}

进行内存映射时根据设备原本绑定的内核驱动模块的不同而不同,以igb_uio驱动为例,首先调用rte_eal_using_phys_addrs()去检查是否允许直接物理地址访问(该内容在介绍DPDK的init过程时的第18步中介绍过,此处不再介绍),允许直接物理地址访问时,在调用pci_uio_map_resource()来实现物理地址映射。

int pci_uio_map_resource(struct rte_pci_device *dev) {
   int i, map_idx = 0, ret;
   uint64_t phaddr;
   struct mapped_pci_resource *uio_res = NULL;
   struct mapped_pci_res_list *uio_res_list =
      RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);

   dev->intr_handle.fd = -1;
   dev->intr_handle.uio_cfg_fd = -1;

   if (rte_eal_process_type() != RTE_PROC_PRIMARY) // DPDK进程是secondary时,才会执行
      return pci_uio_map_secondary(dev);

   /* allocate uio resource */
   ret = pci_uio_alloc_resource(dev, &uio_res);
   if (ret)
      return ret;

   /* Map all BARs */
   for (i = 0; i != PCI_MAX_RESOURCE; i++) {
      /* skip empty BAR */
      phaddr = dev->mem_resource[i].phys_addr;
      if (phaddr == 0)
         continue;

      ret = pci_uio_map_resource_by_index(dev, i, uio_res, map_idx);
      if (ret)
         goto error;
      map_idx++;
   }
   uio_res->nb_maps = map_idx;
   TAILQ_INSERT_TAIL(uio_res_list, uio_res, next);
   return 0;
   ……
}

pci_uio_map_resource()中,首先调用pci_uio_alloc_resource()来分配相应的资源并记录在uio_res中。该func()首先调用pci_get_uio_dev()来确定该设备的uio_num是多少,可以认为是设备绑定了igb_uio驱动之后分配的设备号,uio_num通过文件遍历设备在/sys文件系统当中的信息来确定,确定了uio_num之后,会在/dev目录下通过mknod的方式创建一个对应的设备文件,文件名称为uioX,X=uio_num。接下来打开uioX文件,并将文件描述符记录在intr_handle.fd中,另外打开uioX在/sys文件系统中的配置文件,最后为uio_res分配空间,将设备文件路径和设备的PCI地址存放在其中。

int pci_uio_alloc_resource(struct rte_pci_device *dev, struct mapped_pci_resource **uio_res) {
   char dirname[PATH_MAX];
   char cfgname[PATH_MAX];
   char devname[PATH_MAX]; /* contains the /dev/uioX */
   int uio_num;
   struct rte_pci_addr *loc;

   loc = &dev->addr;

   /* find uio resource */
   uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1);
   ……
   snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);

   dev->intr_handle.fd = open(devname, O_RDWR); // /dev/uioX文件
   ……
   snprintf(cfgname, sizeof(cfgname), "/sys/class/uio/uio%u/device/config", uio_num); // uioX配置文件
   dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
   ……
   /* allocate the mapping details for secondary processes*/
   *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0);
   if (*uio_res == NULL) {
      RTE_LOG(ERR, EAL,
         "%s(): cannot store uio mmap details\n", __func__);
      goto error;
   }

   strlcpy((*uio_res)->path, devname, sizeof((*uio_res)->path));
   memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr));

   return 0;
   ……
}

pci_uio_map_resource()中,接下来调用pci_uio_map_resource_by_index()遍历设备的6个bar空间信息(bar空间信息在DPDK的init过程中已经读入),如果某个bar空间指明有物理地址,则调用pci_uio_map_resource_by_index()来映射资源。该func中,打开设备的某个bar空间在/sys文件系统对应文件resourceX,调用pci_find_max_end_va()找到一块结尾地址最大的虚拟地址空间(遍历mem_config->memsegs);找到符合要求的虚拟地址空间之后,调用pci_map_resource()建立虚拟地址空间和打开的bar文件的映射,pci_map_resource()的实现实际上是调用的mmap()这个Linux的系统调用;最后将resourceX文件的路径,bar空间记录的物理地址及长度,获取到的映射地址等记录在uio_res->maps数组的某个位置。

int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) {
   int fd = -1;
   char devname[PATH_MAX];
   void *mapaddr;
   struct rte_pci_addr *loc;
   struct pci_map *maps;
   int wc_activate = 0;
   ……
   loc = &dev->addr;
   maps = uio_res->maps;

   maps[map_idx].path = rte_malloc(NULL, sizeof(devname), 0);
   ……
   if (!wc_activate || fd < 0) {
      snprintf(devname, sizeof(devname),
         "%s/" PCI_PRI_FMT "/resource%d",
         rte_pci_get_sysfs_path(),
         loc->domain, loc->bus, loc->devid,
         loc->function, res_idx);

      /* then try to map resource file */
      fd = open(devname, O_RDWR);
      if (fd < 0) {
         RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
            devname, strerror(errno));
         goto error;
      }
   }

   /* try mapping somewhere close to the end of hugepages */
   if (pci_map_addr == NULL)
      pci_map_addr = pci_find_max_end_va();

   mapaddr = pci_map_resource(pci_map_addr, fd, 0, (size_t)dev->mem_resource[res_idx].len, 0);
   close(fd);
   ……
   pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t)dev->mem_resource[res_idx].len);
   pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, sysconf(_SC_PAGE_SIZE));

   maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr;
   maps[map_idx].size = dev->mem_resource[res_idx].len;
   maps[map_idx].addr = mapaddr;
   maps[map_idx].offset = 0;
   strcpy(maps[map_idx].path, devname);
   dev->mem_resource[res_idx].addr = mapaddr;
   ……
}

pci_uio_map_resource()中,最后将uio_res插入到uio_res_list,rte_pci_map_device()即完成相关内存映射工作。

回到rte_pci_probe_one_driver(),接下来调用与设备匹配的驱动的probe方法,此处以ixgbe设备的驱动为例,其probe方法为eth_ixgbe_pci_probe(),该内容在下一节中进行分析…….

你可能感兴趣的:(DPU,DPDK,DPU,DPDK)