内核中断笔记(MSIx和GSI)。

struct device 
{
	struct irq_domain	*msi_domain; // 设备关联的是在---->irq-gic-v3-its-pci-msi.c中创建,
}

struct irq_domain
{
	void *host_data;
	struct irq_domain_ops *ops;
}


irq_domain的创建最终通过:
 __irq_domain_add函数:
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
				    irq_hw_number_t hwirq_max, int direct_max,
				    const struct irq_domain_ops *ops,  //提供ops
				    void *host_data)   //额外的host_data。有不同的意义。

层次结构:具体是irq_domain_alloc_irq_data函数建立:

内核中断笔记(MSIx和GSI)。_第1张图片

系统创建的irq_domain:总共4个。通过parent成员组成 层次结构。根为ffff8020c0030400。

[    0.000000] ===__irq_domain_add domain ffff8020c0030400 name irqchip@(____ptrval____) ===    irq-gic-v3.c
[    0.000000] ===__irq_domain_add domain ffff8020c0030500 name irqchip@(____ptrval____) ===    irq-gic-v3-its.c
[    0.002937] ===__irq_domain_add domain ffff8020c0032100 name irqchip@(____ptrval____) ===    irq-gic-v3-its-pci-msi

具体创建过程:

第一个:boot核init_IRQ 函数最终调用到irq_gic_v3.c中

host_data: 是struct gic_chip_data gic_data

irq_domain_ops:gic_irq_domain_ops

参考:irq_gic_v3.c :gic_init_bases函数

[    0.000000] ===__irq_domain_add domain ffff8020c0030400 name irqchip@(____ptrr
val____) ===
[    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #100
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x180
[    0.000000]  show_stack+0x24/0x30
[    0.000000]  dump_stack+0x90/0xb4
[    0.000000]  __irq_domain_add+0x1e4/0x398
[    0.000000]  gic_init_bases+0xd4/0x2dc
[    0.000000]  gic_acpi_init+0x158/0x270
[    0.000000]  acpi_match_madt+0x4c/0x84
[    0.000000]  acpi_table_parse_entries_array+0x140/0x218
[    0.000000]  acpi_table_parse_entries+0x70/0x98
[    0.000000]  acpi_table_parse_madt+0x40/0x50
[    0.000000]  __acpi_probe_device_table+0x88/0xe0
[    0.000000]  irqchip_init+0x38/0x40
[    0.000000]  init_IRQ+0xfc/0x130
[    0.000000]  start_kernel+0x330/0x4b8

  

irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);

第二个 irq_gic_v3调用了irq_gic_v3_its中的函数创建,作为parent传递。

host_data是msi_domain_info(ops:its_msi_domain_ops)

irq_domain_ops是its_domain_ops .

参考:its_probe_one->its_init_domain函数

[    0.000000] ===__irq_domain_add domain ffff8020c0030500 name irqchip@(____ptrr
val____) ===
[    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #100
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x180
[    0.000000]  show_stack+0x24/0x30
[    0.000000]  dump_stack+0x90/0xb4
[    0.000000]  __irq_domain_add+0x1e4/0x398
[    0.000000]  its_probe_one+0x8cc/0x9bc
[    0.000000]  gic_acpi_parse_madt_its+0x104/0x144
[    0.000000]  acpi_table_parse_entries_array+0x140/0x218
[    0.000000]  acpi_table_parse_entries+0x70/0x98
[    0.000000]  acpi_table_parse_madt+0x40/0x50
[    0.000000]  its_init+0x1bc/0x490
[    0.000000]  gic_init_bases+0x1d4/0x2dc
[    0.000000]  gic_acpi_init+0x158/0x270
[    0.000000]  acpi_match_madt+0x4c/0x84
[    0.000000]  acpi_table_parse_entries_array+0x140/0x218
[    0.000000]  acpi_table_parse_entries+0x70/0x98
[    0.000000]  acpi_table_parse_madt+0x40/0x50
[    0.000000]  __acpi_probe_device_table+0x88/0xe0
[    0.000000]  irqchip_init+0x38/0x40



irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);

第三个 最后的do_one_initcall 调用its_pci_msi_init中函数创建,在msi_create_irq_domain函数下创建。

host_data:  msi_domain_info ( ops:its_pci_msi_ops)

irq_domain_ops是 kernel/irq/msi.c提供的msi_domain_ops。

        irq_domain_ops在kernel/irq/msi.c和drivers/pci/msi.c也有,在不同的每个地方都提供了部分回调。

参考:its_pci_msi_init_one->pci_msi_create_irq_domain->msi_create_irq_domain .....

[    0.002937] ===__irq_domain_add domain ffff8020c0032100 name irqchip@(____ptrr
val____) ===
[    0.002939] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0 #100
[    0.002941] Call trace:
[    0.002944]  dump_backtrace+0x0/0x180
[    0.002946]  show_stack+0x24/0x30
[    0.002948]  dump_stack+0x90/0xb4
[    0.002950]  __irq_domain_add+0x1e4/0x398
[    0.002953]  irq_domain_create_hierarchy+0x88/0x90
[    0.002955]  msi_create_irq_domain+0x50/0x160
[    0.002959]  pci_msi_create_irq_domain+0x80/0x160
[    0.002961]  its_pci_msi_init_one+0x94/0xd8
[    0.002963]  its_pci_msi_parse_madt+0x60/0x90
[    0.002965]  acpi_table_parse_entries_array+0x140/0x218
[    0.002967]  acpi_table_parse_entries+0x70/0x98
[    0.002969]  acpi_table_parse_madt+0x40/0x50
[    0.002971]  its_pci_msi_init+0xb8/0xcc
[    0.002973]  do_one_initcall+0x54/0x1d8
[    0.002976]  kernel_init_freeable+0x160/0x328
[    0.002978]  kernel_init+0x18/0x110
[    0.002980]  ret_from_fork+0x10/0x18



irq_domain_update_bus_token(domain, DOMAIN_BUS_PCI_MSI);
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
                            unsigned int nr_irqs, int node, void *arg,
                            bool realloc, const struct cpumask *affinity)
{
        int i, ret, virq;

        if (domain == NULL) {
                domain = irq_default_domain;
                if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
                        return -EINVAL;
        }

        if (!domain->ops->alloc) {
                pr_debug("domain->ops->alloc() is NULL\n");
                return -ENOSYS;
        }

        if (realloc && irq_base >= 0) {
                virq = irq_base;
        } else {
                virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
                                              affinity); //创建irq_desc
                if (virq < 0) {
                        pr_debug("cannot allocate IRQ(base %d, count %d)\n",
                                 irq_base, nr_irqs);
                        return virq;
                }
        }

        if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {  //irq_data 也会创建层次,更加parent。
                pr_debug("cannot allocate memory for IRQ%d\n", virq);
                ret = -ENOMEM;
                goto out_free_desc;
        }

        mutex_lock(&irq_domain_mutex);
        ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); //层次调用,下面日志内容,通过irq_domain_set_hwirq_and_chip 打印。
        if (ret < 0) {
                mutex_unlock(&irq_domain_mutex);
                goto out_free_irq_data;
        }
        for (i = 0; i < nr_irqs; i++)
                irq_domain_insert_irq(virq + i);
        mutex_unlock(&irq_domain_mutex);

        return virq;

out_free_irq_data:
        irq_domain_free_irq_data(virq, nr_irqs);
out_free_desc:
        irq_free_descs(virq, nr_irqs);
        return ret;
}

irq_domain_set_hwirq_and_chip 函数kernel/irq/irqdomain.c

从父到子创建。

pci设备的MSI中断hwirq和virq的创建:

drivers/pci/msi.c函数 pci_msi_setup_msi_irqs中通过dev_get_msi_domain获取的是irq-gic-v3-its-pci-msi.c下创建的domain。然后根据其中的parent,调用irq_domain_alloc_irqs_parent从父节点的domain创建hwirq和virq的映射。

这个是gic-irq-v3.c中的:
[   13.130169] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030400 irq_dataa
 ffff8020d6c8f600  virq 32 hwirq 8197 ===
[   13.140852] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0 #100
[   13.146929] Hardware name: Phytium FT2004/FT2004, BIOS V1006 Apr  7 2020
[   13.153613] Call trace:
[   13.156047]  dump_backtrace+0x0/0x180
[   13.159695]  show_stack+0x24/0x30
[   13.162996]  dump_stack+0x90/0xb4
[   13.166297]  irq_domain_set_hwirq_and_chip+0x78/0xd8
[   13.171246]  irq_domain_set_info+0x58/0x90
[   13.175327]  gic_irq_domain_alloc+0xdc/0x268
[   13.179582]  irq_domain_alloc_irqs_parent+0x48/0x60
[   13.184445]  its_irq_gic_domain_alloc+0xd4/0xf8
[   13.188960]  its_irq_domain_alloc+0xd0/0x168
[   13.193215]  irq_domain_alloc_irqs_parent+0x48/0x60
[   13.198078]  msi_domain_alloc+0x7c/0x160
[   13.201985]  __irq_domain_alloc_irqs+0x150/0x338
[   13.206587]  msi_domain_alloc_irqs+0xa8/0x308
[   13.210930]  pci_msi_setup_msi_irqs+0x64/0x78
[   13.215271]  __pci_enable_msix+0x320/0x540
[   13.219353]  pci_alloc_irq_vectors_affinity+0x120/0x160

下面是gic-irq-v3-its.c中的alloc函数

[   13.447145] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030500 irq_dataa
 ffff8020d6c8cb00  virq 32 hwirq 8197 ===
[   13.457825] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0 #100
[   13.463902] Hardware name: Phytium FT2004/FT2004, BIOS V1006 Apr  7 2020
[   13.470586] Call trace:
[   13.473019]  dump_backtrace+0x0/0x180
[   13.476667]  show_stack+0x24/0x30
[   13.479967]  dump_stack+0x90/0xb4
[   13.483267]  irq_domain_set_hwirq_and_chip+0x78/0xd8
[   13.488216]  its_irq_domain_alloc+0x58/0x168
[   13.492471]  irq_domain_alloc_irqs_parent+0x48/0x60
[   13.497334]  msi_domain_alloc+0x7c/0x160
[   13.501242]  __irq_domain_alloc_irqs+0x150/0x338
[   13.505844]  msi_domain_alloc_irqs+0xa8/0x308
[   13.510186]  pci_msi_setup_msi_irqs+0x64/0x78
[   13.514527]  __pci_enable_msix+0x320/0x540
[   13.518609]  pci_alloc_irq_vectors_affinity+0x120/0x160

最终这个是irq-gic-v3-its-pci-msi.c中的接口调用:

这个3145732 hwirq在函数drivers/pci/msi.c:pci_msi_domain_calc_hwirq中计算。__irq_domain_alloc_irqs下通过ops->set_desc。调到pci_msi_domain_set_desc。到pci_msi_domain_calc_hwirq

[   13.595814] ===irq_domain_set_hwirq_and_chip domain ffff8020c0032100 irq_dataa
 ffff8020c1b70c28  virq 32 hwirq 3145732 ===
[   13.606754] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0 #100
[   13.612830] Hardware name: Phytium FT2004/FT2004, BIOS V1006 Apr  7 2020
[   13.619514] Call trace:
[   13.621947]  dump_backtrace+0x0/0x180
[   13.625595]  show_stack+0x24/0x30
[   13.628896]  dump_stack+0x90/0xb4
[   13.632196]  irq_domain_set_hwirq_and_chip+0x78/0xd8
[   13.637145]  msi_domain_ops_init+0x3c/0x80
[   13.641227]  msi_domain_alloc+0xb8/0x160
[   13.645135]  __irq_domain_alloc_irqs+0x150/0x338
[   13.649737]  msi_domain_alloc_irqs+0xa8/0x308
[   13.654079]  pci_msi_setup_msi_irqs+0x64/0x78
[   13.658421]  __pci_enable_msix+0x320/0x540
[   13.662502]  pci_alloc_irq_vectors_affinity+0x120/0x160

pci驱动初始化irq中断:pci_alloc_irq_vectors_affinity 跟踪:

pci_alloc_irq_vectors_affinity (drivers/pci/msi.c)
	__pci_enable_msi_range
		msi_capability_init              暂分析msi部分
			pci_msi_setup_msi_irqs 			(drivers/pci/msi.c)
				msi_domain_alloc_irqs 		(kernel/irq/msi.c)
					ops->domain_alloc_irqs  (kernel/irq/msi.c) //msi_domain_ops->__msi_domain_alloc_irqs


函数:	
__msi_domain_alloc_irqs (kernel/irq/msi.c)
	msi_domain_prepare_irqs    (一个pci/x 设备分配一个its_devie :调用its_create_device 分配its_device)
 for_each_msi_entry循环:
	ops->set_desc   (kernel/irq/msi.c) (pci_msi_domain_calc_hwirq)计算出的全局唯一ID ,作为最上面的irq_data的hwirq                
	__irq_domain_alloc_irqs  //kernel/irq/irqdomain.c
			irq_domain_alloc_descs (kernel/irq/irqdomain.c) ---分配多个irq_desc
			irq_domain_alloc_irq_data (kernel/irq/irqdomain.c) ---建立irq_doamin。irq_data的层次结构
			irq_domain_alloc_irqs_hierarchy (kernel/irq/irqdomain.c)
	irq_set_msi_desc_off 		//设置msi_entry->irq 。该msi_desc 的irq_base
	irq_domain_insert_irq
	irq_domain_activate_irq   //激活msi/x中断 ,往相关的寄存器写入gic的地址。
		
	
函数:	
irq_domain_alloc_irqs_hierarchy
	domain->ops->alloc(domain, irq_base, nr_irqs, arg)  //kernel/irq/msi.c :irq_domain_ops->alloc(msi_domain_alloc)
		msi_domain_alloc
			irq_domain_alloc_irqs_parent
				irq_domain_alloc_irqs_hierarchy(domain->parent,irq_base,nr_irqsa,arg) ......层级调用 irq-gic-v3-its
					domain->ops->alloc                 //drivers/irqchip/irq-gic-v3-its.c 提供 its_irq_domain_alloc
						its_irq_domain_alloc			//drivers/irqchip/irq-gic-v3-its.c
							its_irq_gic_domain_alloc
								irq_domain_alloc_irqs_parent  //kernel/irq/irqdomain.c
									irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base, nr_irqs, arg);  ----------------层级调用到irq-gic-v3
										domain->ops->alloc     //drivers/irqchip/irq-gic-v3.c  提供gic_irq_domain_alloc
											gic_irq_domain_alloc  //drivers/irqchip/irq-gic-v3.c
												gic_irq_domain_map
													irq_domain_set_info          //kernel/irq/irqdomain.c
														irq_domain_set_hwirq_and_chip  设置最底层的hwirq和irq,irq_chip
														
							irq_domain_set_hwirq_and_chip       //设置irq_data 的hwirq和irq ,irq_chip //drivers/irqchip/irq-gic-v3-its.c
			msi_domain_ops->msi_init                      kernel/irq/msi.c 提供msi_domain_ops_init
				irq_domain_set_hwirq_and_chip		设置最上层的hwirq和irq ,irq_chip
				
				
				
dmesg日志:
不同irq_domain_ops->alloc:先后关系
[   11.265652] ===gic_irq_domain_alloc domain ffff8022c0038f00 virq 47 hwirq 8205 nr_irq 1     //首先是最底层的irq-gic-v3.c
[   11.273733] ===its_irq_domain_alloc domain ffff8022c0039000 virq 47 hwirq 8205 nr_irqs 1    //其次是中级的irq-gic-v3-its.c
[   11.281899] ===msi_domain_alloc doamin ffff8022c003ad00 virq 47  hwirq 7864320 nr_irqs 1		//最后是irq-gic-v3-its-pci-msi.c 使用 kernel/irq/msi.c 的alloc

可以看出:
    每个层级的irq_data的virq是一致的,是同一个虚拟中断号。而hwirq 除了最上层的是pci_msi_domain_calc_hwirq 计算出的全局唯一ID,其它是irq-gic-v3-its.c 中根据gic驱动得出
	

设置irq_data 中    的hwirq和irq ,irq_chip
irq_domain_set_hwirq_and_chip:

其中irq_chip代表的中断芯片的配置,分别定义了三个
drivers/irqchip/irq-gic-v3.c:  gic_chip /"GICv3"
drivers/irqchip/irq-gic-v3-its.c: its_irq_chip /"ITS"
drivers/irqchip/irq-gic-v3-its-pci-msi.c: its_msi_irq_chip /"ITS-MSI"

激活msi/msix中断:

irq_domain_activate_irq
    __irq_domain_activate_irq
        domain->ops->activate //kernel/irq/msi.c 中irq_domain_ops->activate (msi_domain_activate)
            msi_domain_activate
                irq_chip_write_msi_msg




static int msi_domain_activate(struct irq_domain *domain,
                               struct irq_data *irq_data, bool early)
{
        struct msi_msg msg[2] = { [1] = { }, };

        BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
        msi_check_level(irq_data->domain, msg);
        irq_chip_write_msi_msg(irq_data, msg);
        return 0;
}


static inline void irq_chip_write_msi_msg(struct irq_data *data,
					  struct msi_msg *msg)
{
	data->chip->irq_write_msi_msg(data, msg);
}


__pci_write_msi_msg 函数是具体的写操作、

激活msi/msix中断: 


irq_domain_activate_irq
    __irq_domain_activate_irq
        domain->ops->activate //kernel/irq/msi.c 中irq_domain_ops->activate (msi_domain_activate)
            msi_domain_activate  //kernel/irq/msi.c
                irq_chip_compose_msi_msg
                irq_chip_write_msi_msg  //写msi/x 相关寄存器。写入gic地址。
                    irq_data->chip->irq_write_msi_msg(data, msg);  //此时irq_domain是its_pci_msi创建的,irq_data的 chip数据是 irq-gic-v3-its-pci-msi.c  --- irq_chip "ITS-MSI"



激活需要写入msi/x消息:
static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
{
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
        struct its_node *its;
        u64 addr;

        its = its_dev->its;
        addr = its->get_msi_base(its_dev); //调用its_irq_get_msi_base函数

        msg->address_lo         = lower_32_bits(addr); //gic——its的地址
        msg->address_hi         = upper_32_bits(addr);
        msg->data               = its_get_event_id(d);  //data
        printk("===its_irq_compose_msi_msg its_device %llx  address %llx_%llx data %u\n",msg->address_hi,msg->address_lo,msg->data);
        iommu_dma_map_msi_msg(d->irq, msg);
}  

//data的值实际上就是hwirq和当前设备的起始中断号的差。 如果一个设备只有一个,那么这个差值就是0。
static inline u32 its_get_event_id(struct irq_data *d)
{
	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
	return d->hwirq - its_dev->event_map.lpi_base;
}

//获取gic——ITS的地址
static u64 its_irq_get_msi_base(struct its_device *its_dev)
{
        struct its_node *its = its_dev->its;

        return its->phys_base + GITS_TRANSLATER;
}

its_irq_compose_msi_msg:合成msi_msg。

PS:关于补丁中修改的its驱动“msi中断控制器XXXXXXXXXX”的理解:msi_base 没有转换成iova,直接使用。正常使会通过smmu进行转换的。。。
its_irq_compose_msi_msg 中注释了iommu_dma_compose_msi_msg这句话调用。
原函数的意思是需要准备msi_msg。其中有msi_base的地址(its_base+GIC_TRANSLATOR) 


static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
{
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
        struct its_node *its;
        u64 addr;

        its = its_dev->its;
        addr = its->get_msi_base(its_dev); 

        msg->address_lo         = lower_32_bits(addr);
        msg->address_hi         = upper_32_bits(addr);
        msg->data               = its_get_event_id(d);

        iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg); //将msi_base的地址通过 iova地址。
}

void iommu_dma_compose_msi_msg(struct msi_desc *desc,
                               struct msi_msg *msg)
{
        struct device *dev = msi_desc_to_dev(desc);
        const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
        const struct iommu_dma_msi_page *msi_page;

        msi_page = msi_desc_get_iommu_cookie(desc); 

        if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
                return;
 
        msg->address_hi = upper_32_bits(msi_page->iova); // 地址是msi_page的iova 这个是什么是否分配的 ?? 是在its的alloc回调its_irq_domain_alloc->iommu_dma_prepare_msi
        msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
        msg->address_lo += lower_32_bits(msi_page->iova);
}

iommu_dma_prepare_msi函数将	分配iommu_dma_msi_page结构体,记录msi_addr和iova。
int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
{
        struct device *dev = msi_desc_to_dev(desc);
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
        struct iommu_dma_msi_page *msi_page;
        static DEFINE_MUTEX(msi_prepare_lock); /* see below */

        if (!domain || !domain->iova_cookie) {
                desc->iommu_cookie = NULL;
                return 0;
        }

        /*
         * In fact the whole prepare operation should already be serialised by
         * irq_domain_mutex further up the callchain, but that's pretty subtle
         * on its own, so consider this locking as failsafe documentation...
         */
        mutex_lock(&msi_prepare_lock);
        msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
        mutex_unlock(&msi_prepare_lock);

        msi_desc_set_iommu_cookie(desc, msi_page); //将msi_page设置到irq_desc->iommu_cookie

        if (!msi_page)
                return -ENOMEM;
        return 0;
}
			


                
                
                
                

分配的8192开始的号,,怎么开始?its_alloc_device_irq 确定msi的硬件中断号
its_device->event_map.lpi_base->lpi_base号怎么来的 ?

分配的8192开始的号,,怎么开始?its_alloc_device_irq 确定msi的硬件中断号
its_device->event_map.lpi_base->lpi_base号怎么来的 ?

gic_its初始化
its_init
	its_alloc_lpi_tables
		its_lpi_init
			free_lpi_range(8192, lpis);
				mk_lpi_range  //创建lpi_range
				list_add(&new->entry, &lpi_range_list);
				

设备初始化irq	
its_create_device	
	its_lpi_alloc
		alloc_lpi_range //为每个its_device 分配dev->event_map.lpi_base。每次在前面已经分配的后面。

初始化msi中断号:一个its硬件

static int __init its_alloc_lpi_tables(void)
{
        phys_addr_t paddr;

        lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
                                ITS_MAX_LPI_NRBITS);
        gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
        if (!gic_rdists->prop_page) {
                pr_err("Failed to allocate PROPBASE\n");
                return -ENOMEM;
        }

        paddr = page_to_phys(gic_rdists->prop_page);
        pr_info("===GIC: using LPI property table @%pa lpi_id_bits %u \n", &paddr,lpi_id_bits); //这里实测16 。 16个bit位????
        return its_lpi_init(lpi_id_bits);
}


static int __init its_lpi_init(u32 id_bits)
{
        u32 lpis = (1UL << id_bits) - 8192; //左移16位为65536 。意思是总共65536 - 8192个中断?
        u32 numlpis;
        int err;

        numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer);

        if (numlpis > 2 && !WARN_ON(numlpis > lpis)) {
                lpis = numlpis;
                pr_info("ITS: Using hypervisor restricted LPI range [%u]\n",
                        lpis);
        }

        /*
         * Initializing the allocator is just the same as freeing the
         * full range of LPIs.
         */
        err = free_lpi_range(8192, lpis);  //这里会初始化新的lpi_range 。并加入到lpi_range_list,从8192开始计算。总共65536 - 8192 个
        pr_info("===ITS: Allocator initialized for %u LPIs\n", lpis);
        return err;
}


static int free_lpi_range(u32 base, u32 nr_lpis)
{
        struct lpi_range *new;
        int err = 0;

        mutex_lock(&lpi_range_lock);

        new = mk_lpi_range(base, nr_lpis);
        if (!new) {
                err = -ENOMEM;
                goto out;
        }

        list_add(&new->entry, &lpi_range_list);
        list_sort(NULL, &lpi_range_list, lpi_range_cmp);
        merge_lpi_ranges();
out:
        mutex_unlock(&lpi_range_lock);
        return err;
}

分配中断号:

static int alloc_lpi_range(u32 nr_lpis, u32 *base)
{
        struct lpi_range *range, *tmp;
        int err = -ENOSPC;

        mutex_lock(&lpi_range_lock);

        list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) {
                if (range->span >= nr_lpis) {
                        *base = range->base_id;
                        range->base_id += nr_lpis;
                        range->span -= nr_lpis;

                        if (range->span == 0) {
                                list_del(&range->entry);
                                kfree(range);
                        }

                        err = 0;
                        break;
                }
        }

        mutex_unlock(&lpi_range_lock);

        pr_info("==ITS: alloc %u:%u\n", *base, nr_lpis);
        dump_stack();
        return err;
}

dmesg日志:
[    6.293629] ==ITS: alloc 8192:1
[    6.615759] ==ITS: alloc 8193:1
[    6.905502] ==ITS: alloc 8194:1
[    7.280835] ==ITS: alloc 8195:1
[    7.582356] ==ITS: alloc 8196:1
[    7.902356] ==ITS: alloc 8197:1
[    8.198276] ==ITS: alloc 8198:1
[    8.489553] ==ITS: alloc 8199:1
[    8.787372] ==ITS: alloc 8200:1
[   11.019360] ==ITS: alloc 8201:4
[   12.709670] ==ITS: alloc 8205:1
[   16.354248] ==ITS: alloc 8206:1

pci设备注册irq相关的handle:

前面初始化的时会返回多个连续virq。

static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test)
{
        int i;
        int err;
        struct pci_dev *pdev = test->pdev;
        struct device *dev = &pdev->dev;

        for (i = 0; i < test->num_irqs; i++) {
                err = devm_request_irq(dev, pci_irq_vector(pdev, i),  //第i个virq
                                       pci_endpoint_test_irqhandler,
                                       IRQF_SHARED, DRV_MODULE_NAME, test);
                if (err)
                        goto fail;
        }

        return true;

}

获取第nr个virq

int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
{
	if (dev->msix_enabled) { //msix 有多个,一个irq对应一个msi_desc。
		struct msi_desc *entry;
		int i = 0;

		for_each_pci_msi_entry(entry, dev) {
			if (i == nr)
				return entry->irq; 
			i++;
		}
		WARN_ON_ONCE(1);
		return -EINVAL;
	}

	if (dev->msi_enabled) { //msi只有一个msi_desc 。里面记录了irq_base和nvec_used
		struct msi_desc *entry = first_pci_msi_entry(dev);

		if (WARN_ON_ONCE(nr >= entry->nvec_used))
			return -EINVAL;
	} else {
		if (WARN_ON_ONCE(nr > 0))
			return -EINVAL;
	}

	return dev->irq + nr; //从base 开始增加 nr 个。
}

pci_irq_vector 返回第n个irq号。用来申请irq_request。

1:根据irq找到irq_data
    根据irq找到irq_desc->irq_data

两种方式:
irq_get_irq_data:             获取virq关联的第一层irq_data。
irq_domain_get_irq_data:根据irq和domain 找到irq_data。   首先获取virq关联的第一层irq_data ,然后遍历irq_data的parent,比较doamin是否相对。

中断触发后调用gic_handle_irq函数:drivers/irqchip/irq-gic-v3.c

static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
{
        u32 irqnr;

        do {
                irqnr = gic_read_iar();

                if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
                        int err;

                        if (static_branch_likely(&supports_deactivate_key))
                                gic_write_eoir(irqnr);
                        else
                                isb();

                        err = handle_domain_irq(gic_data.domain, irqnr, regs);  //使用的是gic_data.domain。也就是irq_gic_v3.c创建的doamin。 其会根据hwirq找到virq。并执行handle
                        if (err) {
                                WARN_ONCE(true, "Unexpected interrupt received!\n");
                                if (static_branch_likely(&supports_deactivate_key)) {
                                        if (irqnr < 8192)
                                                gic_write_dir(irqnr);
                                } else {
                                        gic_write_eoir(irqnr);
                                }
                        }
                        continue;
                }

。。。。

}

/GSIGSI :Global System Interrupt,是ACPI spec规定的全局中断表。. 它为多IOAPIC情况下确定了系统唯一的一个中断号。. 例如IOAPIC1有24个IRQ,IOAPIC2也有24个IRQ,则IOAPIC2 的GSI是从24开始,GSI = 24 + IRQ(IOAPIC2)。. SCI :System Control Interrupt,系统控制中断,是ACPI定义的,专用于ACPI电源管理的一个IRQ。. 它在Intel平台上常常与南桥的电源管理模块一起,当外部EC等发生Event后会引发SCI。. Windows的SCI ISR程序就是著名的acpi.sys。.

非PCI的 中断hwirq和virq

[    0.000000] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030400 irq_dataa
 ffff8020c0042828  virq 3 hwirq 27 ===
[    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0 #100
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x180
[    0.000000]  show_stack+0x24/0x30
[    0.000000]  dump_stack+0x90/0xb4
[    0.000000]  irq_domain_set_hwirq_and_chip+0x78/0xd8
[    0.000000]  irq_domain_set_info+0x58/0x90
[    0.000000]  gic_irq_domain_alloc+0x170/0x268
[    0.000000]  __irq_domain_alloc_irqs+0x150/0x338
[    0.000000]  irq_create_fwspec_mapping+0x118/0x318
[    0.000000]  acpi_register_gsi+0x6c/0xa8
[    0.000000]  map_gt_gsi+0x30/0x3c
[    0.000000]  acpi_gtdt_map_ppi+0x4c/0x78
[    0.000000]  arch_timer_acpi_init+0xa4/0x280
[    0.000000]  acpi_table_parse+0xa4/0xdc
 

=================================================================

[    0.000000] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030400 irq_data ffff8020c0042428  virq 1 hwirq 25 ===
[    0.000000] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030400 irq_data ffff8020c0042628  virq 2 hwirq 30 ===
[    0.000000] ===irq_domain_set_hwirq_and_chip domain ffff8020c0030400 irq_data ffff8020c0042828  virq 3 hwirq 27 ===
.........

irq_create_fwspec_mapping :

​

unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
{
	struct irq_domain *domain;
	struct irq_data *irq_data;
	irq_hw_number_t hwirq;
	unsigned int type = IRQ_TYPE_NONE;
	int virq;

	if (fwspec->fwnode) {
		domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); //irq-gic-v3.c irq_domain
		if (!domain)
			domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_ANY);
	} else {
		domain = irq_default_domain;
	}

	if (!domain) {
		pr_warn("no irq domain found for %s !\n",
			of_node_full_name(to_of_node(fwspec->fwnode)));
		return 0;
	}

	if (irq_domain_translate(domain, fwspec, &hwirq, &type))
		return 0;


	if (irq_domain_is_hierarchy(domain)) {
		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec); //__irq_domain_alloc_irqs->分配virq。调用irq_domain_alloc_irqs_hierarchy:调用gic_irq_v3下的irq_domain->ops->alloc .....
		if (virq <= 0)
			return 0;
	} else {
		/* Create mapping */
		virq = irq_create_mapping(domain, hwirq);
		if (!virq)
			return virq;
	}

}



static int irq_domain_translate(struct irq_domain *d,
				struct irq_fwspec *fwspec,
				irq_hw_number_t *hwirq, unsigned int *type)
{
#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
	if (d->ops->translate)
		return d->ops->translate(d, fwspec, hwirq, type); //gic_irq_domain_translate
#endif
	if (d->ops->xlate)
		return d->ops->xlate(d, to_of_node(fwspec->fwnode),
				     fwspec->param, fwspec->param_count,
				     hwirq, type);

	/* If domain has no translation, then we assume interrupt line */
	*hwirq = fwspec->param[0];
	return 0;
}



static int gic_irq_domain_translate(struct irq_domain *d,
                                    struct irq_fwspec *fwspec,
                                    unsigned long *hwirq,
                                    unsigned int *type)
{
。。。
        if (is_fwnode_irqchip(fwspec->fwnode)) {
                if(fwspec->param_count != 2)
                        return -EINVAL;

                *hwirq = fwspec->param[0];  //fwspec中记录了hwirq
                *type = fwspec->param[1];

                WARN_ON(*type == IRQ_TYPE_NONE);
                return 0;
        }

。。。。


}

​

irq_create_fwspec_mapping后续调用: 

irq_create_fwspec_mapping
	irq_domain_alloc_irqs
		__irq_domain_alloc_irqs  //后续流程参考msi

比如acpi创建platform过程中

[    5.094866]  irq_create_fwspec_mapping+0x60/0x310
[    5.099555]  acpi_register_gsi+0x64/0xa4
[    5.103464]  acpi_dev_resource_interrupt+0x170/0x1c0
[    5.108414]  acpi_dev_process_resource+0xe0/0x160
[    5.113104]  acpi_walk_resource_buffer+0x68/0xc8
[    5.117706]  acpi_walk_resources+0xb0/0xf8
[    5.121788]  acpi_dev_get_resources+0xcc/0x130
[    5.126217]  acpi_create_platform_device+0xac/0x2b0
[    5.131080]  acpi_bus_attach+0x274/0x284
[    5.134989]  acpi_bus_attach+0x80/0x284
[    5.138810]  acpi_bus_attach+0x80/0x284
[    5.142632]  acpi_bus_scan+0x50/0xb0
[    5.146193]  acpi_scan_init+0x120/0x284
[    5.150015]  acpi_init+0x2c8/0x33c
[    5.153402]  do_one_initcall+0x50/0x2f0
[    5.157224]  kernel_init_freeable+0x340/0x3cc
[    5.161567]  kernel_init+0x20/0x140
[    5.165042]  ret_from_fork+0x10/0x18

platform_get_irq 函数:

drivers/base/platform.c

int platform_get_irq(struct platform_device *dev, unsigned int num)
{
#ifdef CONFIG_SPARC
	/* sparc does not have irqs represented as IORESOURCE_IRQ resources */
	if (!dev || num >= dev->archdata.num_irqs)
		return -ENXIO;
	return dev->archdata.irqs[num];
#else
	struct resource *r;
	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
		int ret;

		ret = of_irq_get(dev->dev.of_node, num);
		if (ret > 0 || ret == -EPROBE_DEFER)
			return ret;
	}

	r = platform_get_resource(dev, IORESOURCE_IRQ, num); //中断是一种资源IORESOURCE_IRQ,参考acpi创建platform_device的文章
	if (has_acpi_companion(&dev->dev)) {
		if (r && r->flags & IORESOURCE_DISABLED) {
			int ret;

			ret = acpi_irq_get(ACPI_HANDLE(&dev->dev), num, r);
			if (ret)
				return ret;
		}
	}

	/*
	 * The resources may pass trigger flags to the irqs that need
	 * to be set up. It so happens that the trigger flags for
	 * IORESOURCE_BITS correspond 1-to-1 to the IRQF_TRIGGER*
	 * settings.
	 */
	if (r && r->flags & IORESOURCE_BITS) {
		struct irq_data *irqd;

		irqd = irq_get_irq_data(r->start);
		if (!irqd)
			return -ENXIO;
		irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
	}

	return r ? r->start : -ENXIO;
#endif
}

你可能感兴趣的:(linux,运维,服务器)