PCIe设备发现过程

PCIe在调试过程中,经常会出现扫描不到对端EP设备的问题,在问题定位过程中,了解内核中pcie枚举流程至关重要。

PCIe枚举过程一般分为三步:
1.创建根节点
2.扫描根节点下设备
3.为根节点下设备分配资源

那么如何发现设备?

从总线扫描pcie设备的函数pci_scan_child_bus开始分析

unsigned int pci_scan_child_bus(struct pci_bus *bus)
{
    unsigned int devfn, pass, max = bus->busn_res.start;
    struct pci_dev *dev;

    dev_dbg(&bus->dev, "scanning bus\n");

    /* Go find them, Rover! */
    for (devfn = 0; devfn < 0x100; devfn += 8)
        pci_scan_slot(bus, devfn);

    /* Reserve buses for SR-IOV capability. */
    max += pci_iov_bus_range(bus);

    /*
     * After performing arch-dependent fixup of the bus, look behind
     * all PCI-to-PCI bridges on this bus.
     */
    if (!bus->is_added) {
        dev_dbg(&bus->dev, "fixups for bus\n");
        pcibios_fixup_bus(bus);
        bus->is_added = 1;
    }

    for (pass = 0; pass < 2; pass++)
        list_for_each_entry(dev, &bus->devices, bus_list) {
            if (pci_is_bridge(dev))
                max = pci_scan_bridge(bus, dev, max, pass);
        }

    /*
     * Make sure a hotplug bridge has at least the minimum requested
     * number of buses.
     */
    if (bus->self && bus->self->is_hotplug_bridge && pci_hotplug_bus_size) {
        if (max - bus->busn_res.start < pci_hotplug_bus_size - 1)
            max = bus->busn_res.start + pci_hotplug_bus_size - 1;
    }

    /*
     * We've scanned the bus and so we know all about what's on
     * the other side of any bridges that may be on this bus plus
     * any devices.
     *
     * Return how far we've got finding sub-buses.
     */
    dev_dbg(&bus->dev, "bus scan returning with max=%02x\n", max);
    return max;
}
EXPORT_SYMBOL_GPL(pci_scan_child_bus);

该函数的核心代码为

for (devfn = 0; devfn < 0x100; devfn += 8)
    pci_scan_slot(bus, devfn);

这里的bus变量来源于pci_create_root_bus,也就是创建的根总线的总线号
devfn : 设备和功能号。
这里使用的是穷举法,把所有的dev和function都尝试一次。

pci_scan_slot函数:

int pci_scan_slot(struct pci_bus *bus, int devfn)
{
    unsigned fn, nr = 0;
    struct pci_dev *dev;

    if (only_one_child(bus) && (devfn > 0))
        return 0; /* Already scanned the entire slot */

    dev = pci_scan_single_device(bus, devfn);
    if (!dev)
        return 0;
    if (!dev->is_added)
        nr++;

    for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
        dev = pci_scan_single_device(bus, devfn + fn);
        if (dev) {
            if (!dev->is_added)
                nr++;
            dev->multifunction = 1;
        }
    }

    /* only one slot has pcie device */
    if (bus->self && nr)
        pcie_aspm_init_link_state(bus->self);

    return nr;
}

pci_scan_single_device:

struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
{
    struct pci_dev *dev;

    dev = pci_get_slot(bus, devfn);
    if (dev) {
        pci_dev_put(dev);
        return dev;
    }

    dev = pci_scan_device(bus, devfn);
    if (!dev)
        return NULL;

    pci_device_add(dev, bus);

    return dev;
}

核心函数为pci_scan_device:

static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
{
    struct pci_dev *dev;
    u32 l;

    if (!pci_bus_read_dev_vendor_id(bus, devfn, &l, 60*1000))
        return NULL;

    dev = pci_alloc_dev(bus);
    if (!dev)
        return NULL;

    dev->devfn = devfn;
    dev->vendor = l & 0xffff;
    dev->device = (l >> 16) & 0xffff;

    pci_set_of_node(dev);

    if (pci_setup_device(dev)) {
        pci_bus_put(dev->bus);
        kfree(dev);
        return NULL;
    }

    return dev;
}

pci_bus_read_dev_vendor_id会去读设备的devid和venderid,如果读不到,说明设备不存在了,如果读到了,就会创建一个pdev。
所以能不能扫描对端设备,就看能不能读到设备的vendorid。

bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *l,
                int crs_timeout)
{
    int delay = 1;

    if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
        return false;

    /* some broken boards return 0 or ~0 if a slot is empty: */
    if (*l == 0xffffffff || *l == 0x00000000 ||
        *l == 0x0000ffff || *l == 0xffff0000)
        return false;

    /*
     * Configuration Request Retry Status.  Some root ports return the
     * actual device ID instead of the synthetic ID (0xFFFF) required
     * by the PCIe spec.  Ignore the device ID and only check for
     * (vendor id == 1).
     */
    while ((*l & 0xffff) == 0x0001) {
        if (!crs_timeout)
            return false;

        msleep(delay);
        delay *= 2;
        if (pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, l))
            return false;
        /* Card hasn't responded in 60 seconds?  Must be stuck. */
        if (delay > crs_timeout) {
            printk(KERN_WARNING "pci %04x:%02x:%02x.%d: not responding\n",
                   pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
                   PCI_FUNC(devfn));
            return false;
        }
    }

    return true;
}

while循环里每隔一段时间就会进行一次venderid的配置读写,从上述代码中可以看出,退出循环的原因会有两个
1. config配置读写失败了 (说明链路不通)
2. 超时没有得到响应(预留时间为60s,已经非常长了,第一点失败的可能性更大)

所以扫描不到对端设备时:
1.确认建链是否成功,建链失败,肯定扫描不到对端
2.确认对端的配置空间是否可写(对端的pcie模块是否处于解复位状态)
3.确认type0,type1,iatu等参数配置是否正确,如果正确了,确认配置访问的地址空间大小是否足够。

你可能感兴趣的:(PCIe学习笔记)