pcie的dpc

DPC的全称是downstream port containment,是针对root port和pcie switch检测到不可恢复的错误时,就会通知下游端口的业务,以防止数据损坏的扩散.
其代码在drivers/pci/pcie/dpc.c
static struct pcie_port_service_driver dpcdriver = {
	.name		= "dpc",
	.port_type	= PCIE_ANY_PORT,
	.service	= PCIE_PORT_SERVICE_DPC,
	.probe		= dpc_probe,
	.remove		= dpc_remove,
	.reset_link	= dpc_reset_link,
};
我们重点看看其probe函数
static int dpc_probe(struct pcie_device *dev)
{
	struct dpc_dev *dpc;
	struct pci_dev *pdev = dev->port;
	struct device *device = &dev->device;
	int status;
	u16 ctl, cap;

	if (pcie_aer_get_firmware_first(pdev))
		return -ENOTSUPP;

	dpc = devm_kzalloc(device, sizeof(*dpc), GFP_KERNEL);
	if (!dpc)
		return -ENOMEM;
#检查是否支持这个feature
	dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
	dpc->dev = dev;
	set_service_data(dev, dpc);
#申请一个中断,这里用到了中断线程化,

	status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
					   dpc_handler, IRQF_SHARED,
					   "pcie-dpc", dpc);
	if (status) {
		pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq,
			 status);
		return status;
	}

}
我们看看中断处理函数的处理
中断处理函数只是清掉中断,然后返回IRQ_HANDLED,就会直接调用dpc_handler来处理
static irqreturn_t dpc_irq(int irq, void *context)
{
	struct dpc_dev *dpc = (struct dpc_dev *)context;
	struct pci_dev *pdev = dpc->dev->port;
	u16 cap = dpc->cap_pos, status;

	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);

	if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0))
		return IRQ_NONE;

	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
			      PCI_EXP_DPC_STATUS_INTERRUPT);
	if (status & PCI_EXP_DPC_STATUS_TRIGGER)
		return IRQ_WAKE_THREAD;
	return IRQ_HANDLED;
}

static irqreturn_t dpc_handler(int irq, void *context)
{
	struct aer_err_info info;
	struct dpc_dev *dpc = context;
	struct pci_dev *pdev = dpc->dev->port;
	u16 cap = dpc->cap_pos, status, source, reason, ext_reason;

	pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
	pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);

	pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
		 status, source);

	reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;
#打印log 来表示当前检测到错误类型
	ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;
	pci_warn(pdev, "%s detected\n",
		 (reason == 0) ? "unmasked uncorrectable error" :
		 (reason == 1) ? "ERR_NONFATAL" :
		 (reason == 2) ? "ERR_FATAL" :
		 (ext_reason == 0) ? "RP PIO error" :
		 (ext_reason == 1) ? "software trigger" :
				     "reserved error");

	/* We configure DPC so it only triggers on ERR_FATAL */
#广播检测到的错误
	pcie_do_recovery(pdev, pci_channel_io_frozen, PCIE_PORT_SERVICE_DPC);

	return IRQ_HANDLED;
}


void pcie_do_recovery(struct pci_dev *dev, enum pci_channel_state state,
		      u32 service)
{
	pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
	struct pci_bus *bus;

	#广播接收到的错误
	pci_dbg(dev, "broadcast error_detected message\n");
	if (state == pci_channel_io_frozen)
		pci_walk_bus(bus, report_frozen_detected, &status);
	else
		pci_walk_bus(bus, report_normal_detected, &status);

#如果只这这两种错误,则不用广播,也无法尝试aer修复
	if (state == pci_channel_io_frozen &&
	    reset_link(dev, service) != PCI_ERS_RESULT_RECOVERED)
		goto failed;
 #广播错误 
	if (status == PCI_ERS_RESULT_CAN_RECOVER) {
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast mmio_enabled message\n");
		pci_walk_bus(bus, report_mmio_enabled, &status);
	}
#广播错误
	if (status == PCI_ERS_RESULT_NEED_RESET) {
		/*
		 * TODO: Should call platform-specific
		 * functions to reset slot before calling
		 * drivers' slot_reset callbacks?
		 */
		status = PCI_ERS_RESULT_RECOVERED;
		pci_dbg(dev, "broadcast slot_reset message\n");
		pci_walk_bus(bus, report_slot_reset, &status);
	}

	if (status != PCI_ERS_RESULT_RECOVERED)
		goto failed;

	pci_dbg(dev, "broadcast resume message\n");
	pci_walk_bus(bus, report_resume, &status);

	pci_aer_clear_device_status(dev);
	pci_cleanup_aer_uncorrect_error_status(dev);
	pci_info(dev, "AER: Device recovery successful\n");
	return;

failed:
	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);

	/* TODO: Should kernel panic here? */
	pci_info(dev, "AER: Device recovery failed\n");
}

 

你可能感兴趣的:(Linux,源码分析)