【PCI】pcie-switch应用——热拔插(七)

内核热插拔驱动

PCI设备在BIOS启动阶段枚举,之后传给linux内核,由linux内核将每个PCI设备注册到pci_bus_type总线上,进而匹配pci驱动,如下图所示。
【PCI】pcie-switch应用——热拔插(七)_第1张图片
PCI桥设备也会匹配pci驱动,这个驱动名称为pcieport,该驱动在portdrv_pci.c文件中注册,来看一下它的匹配规则,如下所示。

static int __init pcie_portdrv_init(void)
{
	if (pcie_ports_disabled)
		return -EACCES;

	pcie_init_services();
	dmi_check_system(pcie_portdrv_dmi_table);![在这里插入图片描述](https://img-blog.csdnimg.cn/b60716ee1baf411e9aaf5a48b48df586.png#pic_center)


	return pci_register_driver(&pcie_portdriver);
}
device_initcall(pcie_portdrv_init);

static struct pci_driver pcie_portdriver = {
	.name		= "pcieport",
	.id_table	= &port_pci_ids[0],

	.probe		= pcie_portdrv_probe,
	.remove		= pcie_portdrv_remove,
	.shutdown	= pcie_portdrv_remove,

	.err_handler	= &pcie_portdrv_err_handler,

	.driver.pm	= PCIE_PORTDRV_PM_OPS,
};

static const struct pci_device_id port_pci_ids[] = {
	/* handle any PCI-Express port */
	{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x00), ~0) },
	/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
	{ PCI_DEVICE_CLASS(((PCI_CLASS_BRIDGE_PCI << 8) | 0x01), ~0) },
	{ },
};

#define PCI_DEVICE_CLASS(dev_class,dev_class_mask) \
	.class = (dev_class), .class_mask = (dev_class_mask), \
	.vendor = PCI_ANY_ID, .device = PCI_ANY_ID, \
	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID

pcieport驱动会匹配所有的桥设备,然后执行驱动的probe函数,执行过程如下。

----->probe
		pcie_portdrv_probe
			pcie_port_device_register
				get_port_device_capability //检查bridge支持的服务,热插拔检查如下
					if (dev->is_hotplug_bridge &&
							(pcie_ports_native || host->native_pcie_hotplug)) {
						services |= PCIE_PORT_SERVICE_HP; //置位服务标志——热插拔,注意这个服务标志,后续要以此匹配驱动
						
				pcie_init_service_irqs //为所有服务申请中断号,只保存起来,后续供服务驱动使用
					pcie_init_service_irqs
						pci_alloc_irq_vectors(dev, 1, PCIE_PORT_MAX_MSI_ENTRIES,PCI_IRQ_MSIX | PCI_IRQ_MSI);
						
				pcie_device_init //为每个服务注册设备,挂接在pcie_port_bus_type总线上
						/* Initialize generic device interface */
						device = &pcie->device;
						device->bus = &pcie_port_bus_type;
						device->release = release_pcie_device;	/* callback to free pcie dev */
						dev_set_name(device, "%s:pcie%03x",
								 pci_name(pdev),
								 get_descriptor_id(pci_pcie_type(pdev), service));
						device->parent = &pdev->dev;
						device_enable_async_suspend(device);

						retval = device_register(device);

首先检查设备所支持的服务,对于支持的服务置上标志位,这里主要关心PCIE_PORT_SERVICE_HP服务;然后为每个服务申请中断号保存,供后续服务驱动使用;最后创建一个设备注册到pcie_port_bus_type总线上。

对于有PCIE_PORT_SERVICE_HP服务的设备,将会匹配pciehp驱动,该驱动在pciehp_core.c文件中注册,注册过程如下。

int __init pcie_hp_init(void)
{
	int retval = 0;

	retval = pcie_port_service_register(&hpdriver_portdrv);
	pr_debug("pcie_port_service_register = %d\n", retval);
	if (retval)
		pr_debug("Failure to register service\n");

	return retval;
}

int pcie_port_service_register(struct pcie_port_service_driver *new)
{
	if (pcie_ports_disabled)
		return -ENODEV;

	new->driver.name = new->name;
	new->driver.bus = &pcie_port_bus_type;
	new->driver.probe = pcie_port_probe_service;
	new->driver.remove = pcie_port_remove_service;
	new->driver.shutdown = pcie_port_shutdown_service;

	return driver_register(&new->driver);
}

static struct pcie_port_service_driver hpdriver_portdrv = {
	.name		= "pciehp",
	.port_type	= PCIE_ANY_PORT,
	.service	= PCIE_PORT_SERVICE_HP,

	.probe		= pciehp_probe,
	.remove		= pciehp_remove,

#ifdef	CONFIG_PM
#ifdef	CONFIG_PM_SLEEP
	.suspend	= pciehp_suspend,
	.resume_noirq	= pciehp_resume_noirq,
	.resume		= pciehp_resume,
#endif
	.runtime_suspend = pciehp_runtime_suspend,
	.runtime_resume	= pciehp_runtime_resume,
#endif	/* PM */
};

pciehp驱动也会注册在pcie_port_bus_type总线上,那么这个总线的匹配规则是什么呢?

struct bus_type pcie_port_bus_type = {
	.name		= "pci_express",
	.match		= pcie_port_bus_match,
};
static int pcie_port_bus_match(struct device *dev, struct device_driver *drv)
{
	struct pcie_device *pciedev;
	struct pcie_port_service_driver *driver;

	if (drv->bus != &pcie_port_bus_type || dev->bus != &pcie_port_bus_type)
		return 0;

	pciedev = to_pcie_device(dev);
	driver = to_service_driver(drv);

	if (driver->service != pciedev->service) //匹配规则是服务相等
		return 0;

	if (driver->port_type != PCIE_ANY_PORT &&
	    driver->port_type != pci_pcie_type(pciedev->port))
		return 0;

	return 1;
}

匹配规则是服务相等。pciehp驱动与设备匹配成功后执行pciehp_probe函数,如下。

----->pciehp_probe
				pcie_init
				init_slot //设置真正的操作函数集合
					ops->enable_slot = pciehp_sysfs_enable_slot;
					ops->disable_slot = pciehp_sysfs_disable_slot;
					ops->get_power_status = get_power_status;
					ops->get_adapter_status = get_adapter_status;
					ops->reset_slot = pciehp_reset_slot;

				pcie_init_notification	//之前在pcieport驱动的probe函数中申请了中断号,在这里使用————注册中断
                    pciehp_request_irq
                    	kthread_run(&pciehp_poll, ctrl,"pciehp_poll-%s", slot_name(ctrl));
						or //两种方式选择一种,一般使用中断方式
						request_threaded_irq(irq, pciehp_isr, pciehp_ist, IRQF_SHARED, "pciehp", ctrl);

				pci_hp_add //创建sys属性,提供操作方法
					fs_add_slot
						if (has_power_file(pci_slot)) {
							retval = sysfs_create_file(&pci_slot->kobj,
										   &hotplug_slot_attr_power.attr);
							if (retval)
								goto exit_power;
						}

						if (has_attention_file(pci_slot)) {
							retval = sysfs_create_file(&pci_slot->kobj,
										   &hotplug_slot_attr_attention.attr);
							if (retval)
								goto exit_attention;
						}

先初始化槽位真正的操作函数,然后创建内核查询槽位状态的线程或注册中断,最后注册sys属性给用户使用。

综上,整个过程如下图所示。
【PCI】pcie-switch应用——热拔插(七)_第2张图片

触发方法

触发方法有两种,分别为中断方式和POLL方式。

中断方式

硬中断

当SLOT上报中断给host时,将调用中断号对应的上半部中断处理函数pciehp_isr,由该函数检查状态确认有中断事件产生,然后将唤醒pciehp_ist中断处理线程执行真正处理动作。

pciehp_isr中断的上半部处理函数如下。

static irqreturn_t pciehp_isr(int irq, void *dev_id)
{
	struct controller *ctrl = (struct controller *)dev_id;
	struct pci_dev *pdev = ctrl_dev(ctrl);
	struct device *parent = pdev->dev.parent;
	u16 status, events = 0;

	/*
	 * Interrupts only occur in D3hot or shallower and only if enabled
	 * in the Slot Control register (PCIe r4.0, sec 6.7.3.4).
	 */
	if (pdev->current_state == PCI_D3cold ||
	    (!(ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE) && !pciehp_poll_mode))//若属于D3cold状态则退出
		return IRQ_NONE;

	/*
	 * Keep the port accessible by holding a runtime PM ref on its parent.
	 * Defer resume of the parent to the IRQ thread if it's suspended.
	 * Mask the interrupt until then.
	 */
	if (parent) {
		pm_runtime_get_noresume(parent);
		if (!pm_runtime_active(parent)) {
			pm_runtime_put(parent);
			disable_irq_nosync(irq);
			atomic_or(RERUN_ISR, &ctrl->pending_events);
			return IRQ_WAKE_THREAD;
		}
	}

read_status:
	pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);//读取槽位状态
	if (status == (u16) ~0) {
		ctrl_info(ctrl, "%s: no response from device\n", __func__);
		if (parent)
			pm_runtime_put(parent);
		return IRQ_NONE;
	}

	/*
	 * Slot Status contains plain status bits as well as event
	 * notification bits; right now we only want the event bits.
	 */
	status &= PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
		  PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
		  PCI_EXP_SLTSTA_DLLSC;//留下关心的事件

	/*
	 * If we've already reported a power fault, don't report it again
	 * until we've done something to handle it.
	 */
	if (ctrl->power_fault_detected)
		status &= ~PCI_EXP_SLTSTA_PFD;

	events |= status;//将事件保存在events
	if (!events) {
		if (parent)
			pm_runtime_put(parent);
		return IRQ_NONE;
	}

	if (status) {
		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);//清除设备的事件位

		/*
		 * In MSI mode, all event bits must be zero before the port
		 * will send a new interrupt (PCIe Base Spec r5.0 sec 6.7.3.4).
		 * So re-read the Slot Status register in case a bit was set
		 * between read and write.
		 */
		if (pci_dev_msi_enabled(pdev) && !pciehp_poll_mode)
			goto read_status;
	}

	ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events);
	if (parent)
		pm_runtime_put(parent);

	/*
	 * Command Completed notifications are not deferred to the
	 * IRQ thread because it may be waiting for their arrival.
	 */
	if (events & PCI_EXP_SLTSTA_CC) {
		ctrl->cmd_busy = 0;
		smp_mb();
		wake_up(&ctrl->queue);

		if (events == PCI_EXP_SLTSTA_CC)
			return IRQ_HANDLED;

		events &= ~PCI_EXP_SLTSTA_CC;
	}

	if (pdev->ignore_hotplug) {
		ctrl_dbg(ctrl, "ignoring hotplug event %#06x\n", events);
		return IRQ_HANDLED;
	}

	/* Save pending events for consumption by IRQ thread. */
	atomic_or(events, &ctrl->pending_events);//将事件保存在pending_events中,唤醒中断线程pciehp_ist
	return IRQ_WAKE_THREAD;
}

假设SLOT上报的中断,并置位了SLOT状态PCI_EXP_SLTSTA_DLLSC位(链路状态已变化),则将唤醒pciehp_ist中断线程(中断下半部),代码如下。

static irqreturn_t pciehp_ist(int irq, void *dev_id)
{
	struct controller *ctrl = (struct controller *)dev_id;
	struct pci_dev *pdev = ctrl_dev(ctrl);
	irqreturn_t ret;
	u32 events;

	ctrl->ist_running = true;
	pci_config_pm_runtime_get(pdev);

	/* rerun pciehp_isr() if the port was inaccessible on interrupt */
	if (atomic_fetch_and(~RERUN_ISR, &ctrl->pending_events) & RERUN_ISR) {
		ret = pciehp_isr(irq, dev_id);
		enable_irq(irq);
		if (ret != IRQ_WAKE_THREAD)
			goto out;
	}

	synchronize_hardirq(irq);
	events = atomic_xchg(&ctrl->pending_events, 0);
	if (!events) {
		ret = IRQ_NONE;
		goto out;
	}

	/* Check Attention Button Pressed */
	if (events & PCI_EXP_SLTSTA_ABP) {
		ctrl_info(ctrl, "Slot(%s): Attention button pressed\n",
			  slot_name(ctrl));
		pciehp_handle_button_press(ctrl);
	}

	/* Check Power Fault Detected */
	if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
		ctrl->power_fault_detected = 1;
		ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl));
		pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
				      PCI_EXP_SLTCTL_ATTN_IND_ON);
	}

	/*
	 * Disable requests have higher priority than Presence Detect Changed
	 * or Data Link Layer State Changed events.
	 */
	down_read(&ctrl->reset_lock);
	if (events & DISABLE_SLOT) //最主要的代码,根据事件标志执行对应动作
		pciehp_handle_disable_request(ctrl);
	else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
		pciehp_handle_presence_or_link_change(ctrl, events);
	up_read(&ctrl->reset_lock);

	ret = IRQ_HANDLED;
out:
	pci_config_pm_runtime_put(pdev);
	ctrl->ist_running = false;
	wake_up(&ctrl->requester);
	return ret;
}

上面假设产生的是PCI_EXP_SLTSTA_DLLSC事件,因此执行pciehp_handle_presence_or_link_change函数,代码如下。

void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events)
{
	int present, link_active;

	/*
	 * If the slot is on and presence or link has changed, turn it off.
	 * Even if it's occupied again, we cannot assume the card is the same.
	 */
	mutex_lock(&ctrl->state_lock);
	switch (ctrl->state) {
	case BLINKINGOFF_STATE:
		cancel_delayed_work(&ctrl->button_work);
		fallthrough;
	case ON_STATE: //若之前槽位是打开状态,则关闭槽位并停止槽位下所有设备的驱动,然后移除槽位下所有设备
		ctrl->state = POWEROFF_STATE;
		mutex_unlock(&ctrl->state_lock);
		if (events & PCI_EXP_SLTSTA_DLLSC)
			ctrl_info(ctrl, "Slot(%s): Link Down\n",
				  slot_name(ctrl));
		if (events & PCI_EXP_SLTSTA_PDC)
			ctrl_info(ctrl, "Slot(%s): Card not present\n",
				  slot_name(ctrl));
		pciehp_disable_slot(ctrl, SURPRISE_REMOVAL);
		break;
	default:
		mutex_unlock(&ctrl->state_lock);
		break;
	}

	/* Turn the slot on if it's occupied or link is up */
	mutex_lock(&ctrl->state_lock);
	present = pciehp_card_present(ctrl);//SLOT上设备是否在位
	link_active = pciehp_check_link_active(ctrl);//数据链路是否激活
	if (present <= 0 && link_active <= 0) {//若不在位且未激活
		mutex_unlock(&ctrl->state_lock);
		return;
	}

	switch (ctrl->state) {
	case BLINKINGON_STATE:
		cancel_delayed_work(&ctrl->button_work);
		fallthrough;
	case OFF_STATE: //若之前槽位是关闭状态,则打开槽位并上电,然后扫描槽位下所有设备,为设备分配资源并注册到总线
		ctrl->state = POWERON_STATE;
		mutex_unlock(&ctrl->state_lock);
		if (present)
			ctrl_info(ctrl, "Slot(%s): Card present\n",
				  slot_name(ctrl));
		if (link_active)
			ctrl_info(ctrl, "Slot(%s): Link Up\n",
				  slot_name(ctrl));
		ctrl->request_result = pciehp_enable_slot(ctrl);
		break;
	default:
		mutex_unlock(&ctrl->state_lock);
		break;
	}
}

//pciehp_disable_slot软中断介绍
pciehp_enable_slot
    __pciehp_enable_slot
    	board_added
    		pciehp_power_on_slot(ctrl);//使能槽位
			pciehp_check_link_status(ctrl);//检查数据链路状态
			pciehp_configure_device(ctrl);//重新枚举SLOT下所有设备,分配资源并注册到总线
				pci_scan_slot(parent, PCI_DEVFN(0, 0));

若SLOT之前是打开状态,则中断函数中会关闭槽位,移除资源。若SLOT之前是关闭状态,则中断函数中会打开槽位,重新枚举设备分配资源并注册到总线。

注意:SLOT单独上报一个PCI_EXP_SLTSTA_DLLSC事件是无效的,必须要和链路状态结合使用,比如,置位PCI_EXP_LNKSTA_DLLLA和置位PCI_EXP_SLTSTA_DLLSC且槽位上一次状态为OFF_STATE,则SLOT上报中断后将热插pcie设备。

硬中断触发流程:
【PCI】pcie-switch应用——热拔插(七)_第3张图片
硬件(pcie switch或slot)改变slot status、link status 状态,比如置位Data Link Layer State Changed 位和清除Data Link Layer Link Active位。硬件(pcie switch或slot)上报中断给hot-plug system driver,由hot-plug system driver读取slot status、link status 状态确认事件,如上面两位变化(热拔),则停用slot上所插pcie设备的驱动,然后移除设备和资源。

软中断

与热插拔相关属性是power,操作函数如下。

static struct pci_slot_attribute hotplug_slot_attr_power = {
	.attr = {.name = "power", .mode = S_IFREG | S_IRUGO | S_IWUSR},
	.show = power_read_file,
	.store = power_write_file
};

static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
				size_t count)
{
	struct hotplug_slot *slot = pci_slot->hotplug;
	unsigned long lpower;
	u8 power;
	int retval = 0;

	lpower = simple_strtoul(buf, NULL, 10);
	power = (u8)(lpower & 0xff);
	dbg("power = %d\n", power);

	if (!try_module_get(slot->owner)) {
		retval = -ENODEV;
		goto exit;
	}
	switch (power) {
	case 0:
		if (slot->ops->disable_slot)
			retval = slot->ops->disable_slot(slot);
		break;

	case 1:
		if (slot->ops->enable_slot)
			retval = slot->ops->enable_slot(slot);
		break;

	default:
		err("Illegal value specified for power\n");
		retval = -EINVAL;
	}
	module_put(slot->owner);

exit:
	if (retval)
		return retval;
	return count;
}

int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
{
	struct controller *ctrl = to_ctrl(hotplug_slot);

	mutex_lock(&ctrl->state_lock);
	switch (ctrl->state) {
	case BLINKINGON_STATE:
	case OFF_STATE:
		mutex_unlock(&ctrl->state_lock);
		/*
		 * The IRQ thread becomes a no-op if the user pulls out the
		 * card before the thread wakes up, so initialize to -ENODEV.
		 */
		ctrl->request_result = -ENODEV;
		pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
		wait_event(ctrl->requester,
			   !atomic_read(&ctrl->pending_events) &&
			   !ctrl->ist_running);
		return ctrl->request_result;
	case POWERON_STATE:
		ctrl_info(ctrl, "Slot(%s): Already in powering on state\n",
			  slot_name(ctrl));
		break;
	case BLINKINGOFF_STATE:
	case ON_STATE:
	case POWEROFF_STATE:
		ctrl_info(ctrl, "Slot(%s): Already enabled\n",
			  slot_name(ctrl));
		break;
	default:
		ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
			 slot_name(ctrl), ctrl->state);
		break;
	}
	mutex_unlock(&ctrl->state_lock);

	return -ENODEV;
}

int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
{
	struct controller *ctrl = to_ctrl(hotplug_slot);

	mutex_lock(&ctrl->state_lock);
	switch (ctrl->state) {
	case BLINKINGOFF_STATE:
	case ON_STATE:
		mutex_unlock(&ctrl->state_lock);
		pciehp_request(ctrl, DISABLE_SLOT);
		wait_event(ctrl->requester,
			   !atomic_read(&ctrl->pending_events) &&
			   !ctrl->ist_running);
		return ctrl->request_result;
	case POWEROFF_STATE:
		ctrl_info(ctrl, "Slot(%s): Already in powering off state\n",
			  slot_name(ctrl));
		break;
	case BLINKINGON_STATE:
	case OFF_STATE:
	case POWERON_STATE:
		ctrl_info(ctrl, "Slot(%s): Already disabled\n",
			  slot_name(ctrl));
		break;
	default:
		ctrl_err(ctrl, "Slot(%s): Invalid state %#x\n",
			 slot_name(ctrl), ctrl->state);
		break;
	}
	mutex_unlock(&ctrl->state_lock);

	return -ENODEV;
}

void pciehp_request(struct controller *ctrl, int action)
{
	atomic_or(action, &ctrl->pending_events);
	if (!pciehp_poll_mode)
		irq_wake_thread(ctrl->pcie->irq, ctrl);
}

即用户可以在host主机使用以下方法

echo  0  >  /sys/bus/pci/slots//power //通知linux内核需要热移除SLOT槽位下的pcie设备
echo  1  >  /sys/bus/pci/slots//power //通知linux内核需要热添加SLOT槽位下的pcie设备

用户操作后将唤醒pciehp驱动的中断线程(下半部中断),即pciehp_ist。

在pciehp_ist中断线程中查询ctrl->pending_events标志(上面置位了DISABLE_SLOT或PCI_EXP_SLTSTA_PDC),根据标志执行处理函数。

if (events & DISABLE_SLOT)
		pciehp_handle_disable_request(ctrl);
	else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
		pciehp_handle_presence_or_link_change(ctrl, events);

对于DISABLE_SLOT标志(echo 0 > power),执行pciehp_handle_disable_request函数,过程如下。

pciehp_handle_disable_request(struct controller *ctrl)
	pciehp_disable_slot(ctrl, SAFE_REMOVAL); //关闭SLOT
		pciehp_disable_slot
			remove_board
			
static void remove_board(struct controller *ctrl, bool safe_removal)
{
	pciehp_unconfigure_device(ctrl, safe_removal); //移除bridge下所有设备

	if (POWER_CTRL(ctrl)) {
		pciehp_power_off_slot(ctrl); //关闭槽位电源

		/*
		 * After turning power off, we must wait for at least 1 second
		 * before taking any action that relies on power having been
		 * removed from the slot/adapter.
		 */
		msleep(1000);

		/* Ignore link or presence changes caused by power off */
		atomic_and(~(PCI_EXP_SLTSTA_DLLSC | PCI_EXP_SLTSTA_PDC),
			   &ctrl->pending_events);
	}

	pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
			      INDICATOR_NOOP);
}

void pciehp_unconfigure_device(struct controller *ctrl, bool presence)
{
	struct pci_dev *dev, *temp;
	struct pci_bus *parent = ctrl->pcie->port->subordinate;
	u16 command;

	ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
		 __func__, pci_domain_nr(parent), parent->number);

	if (!presence)
		pci_walk_bus(parent, pci_dev_set_disconnected, NULL);

	pci_lock_rescan_remove();

	/*
	 * Stopping an SR-IOV PF device removes all the associated VFs,
	 * which will update the bus->devices list and confuse the
	 * iterator.  Therefore, iterate in reverse so we remove the VFs
	 * first, then the PF.  We do the same in pci_stop_bus_device().
	 */
	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
					 bus_list) { //遍历所有子总线
		pci_dev_get(dev);
		pci_stop_and_remove_bus_device(dev);//停止桥下设备驱动,再移除设备
		/*
		 * Ensure that no new Requests will be generated from
		 * the device.
		 */
		if (presence) {
			pci_read_config_word(dev, PCI_COMMAND, &command);
			command &= ~(PCI_COMMAND_MASTER | PCI_COMMAND_SERR);
			command |= PCI_COMMAND_INTX_DISABLE;
			pci_write_config_word(dev, PCI_COMMAND, command);
		}
		pci_dev_put(dev);
	}

	pci_unlock_rescan_remove();
}

void pci_stop_and_remove_bus_device(struct pci_dev *dev)
{
	pci_stop_bus_device(dev);
	pci_remove_bus_device(dev);
}

这样就将对于SLOT下所有设备移除了。

对于PCI_EXP_SLTSTA_PDC标志(echo 1 > power),执行pciehp_handle_presence_or_link_change函数,若SLOT是关闭状态,则重新打开SLOT,给SLOT上电使能,然后枚举SLOT下所有pci设备,给设备分配资源并注册到总线上。

触发流程:
【PCI】pcie-switch应用——热拔插(七)_第4张图片
host用户层写指定slot的power属性,将触发软中断,比如echo 0 > power。触发软中断将强制唤醒hot-plug system driver下半部中断(pciehp_ist),并传入关闭slot事件码(DISABLE_SLOT)。pciehp_ist函数中根据事件码执行相应动作,比如DISABLE_SLOT事件码将停用slot上所插pcie设备的驱动,然后移除设备资源,最后关闭slot电源。

POLL方式

内核也提供POLL方式不断检查SLOT的状态,但这个功能需要将模块参数pciehp_poll_mode置位,POLL函数为pciehp_poll,代码如下。

static int pciehp_poll(void *data)
{
	struct controller *ctrl = data;

	schedule_timeout_idle(10 * HZ); /* start with 10 sec delay */

	while (!kthread_should_stop()) {
		/* poll for interrupt events or user requests */
		while (pciehp_isr(IRQ_NOTCONNECTED, ctrl) == IRQ_WAKE_THREAD ||
		       atomic_read(&ctrl->pending_events))
			pciehp_ist(IRQ_NOTCONNECTED, ctrl);

		if (pciehp_poll_time <= 0 || pciehp_poll_time > 60)
			pciehp_poll_time = 2; /* clamp to sane value */

		schedule_timeout_idle(pciehp_poll_time * HZ);
	}

	return 0;
}

默认情况,每2s调用一下pciehp驱动的中断上半部函数pciehp_isr检查SLOT状态变化,若有事件产生,则调用pciehp_ist函数执行对应动作,例如slot置位Data Link Layer State Changed 位和清除Data Link Layer Link Active位,将热移除slot上所插pcie设备。

注意:热插拔技术实现需要给slot槽位预留出足够的资源(bus号、memory空间、I/O空间)!!!

可选方案:

  • 对于主板上slot槽位,由BIOS提前预留足够资源,或开启热插拔能力并修改资源预留大小(查看linux内核pci_setup函数获取字符名称)。
  • 对于PCIe switch的port口,由switch预留足够资源(留个迷,请结合上一篇文章内容思考方案)。
  • 如果自己测试热插拔,请将pcie设备先插入slot上带电启动,由BIOS分配足够资源后进行热插拔测试。

你可能感兴趣的:(#,PCI,&,PCIE,linux,pcie,switch,pcie热插拔,热插拔)