★★★ 个人博客导读首页—点击此处 ★★★
.
说明:
在默认情况下,本文讲述的都是ARMV8-aarch64架构,linux kernel 5.14
我们可以看出,实际上有四组表,每组表有四个异常入口,分别对应同步异常,IRQ,FIQ和serror。
另外我们还可以看到的一点是,每一个异常入口不再仅仅占用4bytes的空间,而是占用0x80 bytes空间,也就是说,每一个异常入口可以放置多条指令,而不仅仅是一条跳转指令
armv8定义了VBAR_EL1、VBAR_EL2、VBAR_EL3三个基地址寄存器
思考:
1、VBAR_EL1、VBAR_EL2、VBAR_EL3写入的基地址,是物理地址还是虚拟地址?
2、基地址不再放0x00000000的位置吗?
3、异常向量表中,没有reset offset了?
4、异常向量表中的每一个offset为啥是0x80(128)地址空间? 以前是多少?
5、VBAR_ELx中,为啥末尾11个bit是reserved?
(linux/arch/arm64/kernel/entry.S)
/*
* Exception vectors.
*/
.pushsection ".entry.text", "ax"
.align 11
SYM_CODE_START(vectors)
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
kernel_ventry 1, error_invalid // Error EL1t
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
kernel_ventry 1, fiq // FIQ EL1h
kernel_ventry 1, error // Error EL1h
kernel_ventry 0, sync // Synchronous 64-bit EL0
kernel_ventry 0, irq // IRQ 64-bit EL0
kernel_ventry 0, fiq // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
SYM_CODE_END(vectors)
思考:
1、这里有没有按照armv8定义的异常向量表排列?不是每一个offset只有128bytes地址空间吗,如何做到的?
2、Linux Kernel arm64体系中不是没有实现FIQ吗,这里为何实现了?
3、第一组异常向量为何没有实现?
(linux/arch/arm64/kernel/head.S)
SYM_FUNC_START_LOCAL(__primary_switched)
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
adr_l x5, init_task
msr sp_el0, x5 // Save thread_info
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
......
b start_kernel
SYM_FUNC_END(__primary_switched)
思考:
1、设置VBAR_EL1,如果系统系统里有8个ARM Core,那么8个Core都需要设置吗,分别如何设置的?
(linux/arch/arm64/kernel/entry.S)
.macro kernel_ventry, el, label, regsize = 64
.align 7
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
.if \el == 0
alternative_if ARM64_UNMAP_KERNEL_AT_EL0
.if \regsize == 64
mrs x30, tpidrro_el0
msr tpidrro_el0, xzr
.else
mov x30, xzr
.endif
alternative_else_nop_endif
.endif
#endif
sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
* Task and IRQ stacks are aligned so that SP & (1 << THREAD_SHIFT)
* should always be zero.
*/
add sp, sp, x0 // sp' = sp + x0
sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
b el\()\el\()_\label
0:
/*
* Either we've just detected an overflow, or we've taken an exception
* while on the overflow stack. Either way, we won't return to
* userspace, and can clobber EL0 registers to free up GPRs.
*/
/* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
sub x0, sp, x0
msr tpidrro_el0, x0
/* Switch to the overflow stack */
adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0
/*
* Check whether we were already on the overflow stack. This may happen
* after panic() re-enables interrupts.
*/
mrs x0, tpidr_el0 // sp of interrupted context
sub x0, sp, x0 // delta with top of overflow stack
tst x0, #~(OVERFLOW_STACK_SIZE - 1) // within range?
b.ne __bad_stack // no? -> bad stack pointer
/* We were already on the overflow stack. Restore sp/x0 and carry on. */
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
b el\()\el\()_\label
.endm
注意.align=7,说明该段代码是以2^7=128字节对其的,这和向量表中每一个offset的大小是一致的
代码看似非常复杂,其实最终跳转到了b el\()\el\()_\label
, 翻译一下,其实就是跳转到了如下这样的函数中
el1_sync_invalid
el1_irq_invalid
el1_fiq_invalid
el1_error_invalid
el1_sync
el1_irq
el1_fiq
el1_error
el0_sync
el0_irq
el0_fiq
el0_error
未实现的向量定义为了elx_yyy_invalid函数, 该invalid函数其实也是一种实现,它最终调用了panic函数
例如el1_irq_invalid的Flow : el1_irq_invalid --> bl bad_mode
--> panic(“bad mode”)
SYM_CODE_START_LOCAL(el1_irq_invalid)
inv_entry 1, BAD_IRQ
SYM_CODE_END(el1_irq_invalid)
/*
* Bad Abort numbers
*-----------------
*/
#define BAD_SYNC 0
#define BAD_IRQ 1
#define BAD_FIQ 2
#define BAD_ERROR 3
/*
* Invalid mode handlers
*/
.macro inv_entry, el, reason, regsize = 64
kernel_entry \el, \regsize
mov x0, sp
mov x1, #\reason
mrs x2, esr_el1
bl bad_mode
ASM_BUG()
.endm
/*
* bad_mode handles the impossible case in the exception vector. This is always
* fatal.
*/
asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
{
arm64_enter_nmi(regs);
console_verbose();
pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
handler[reason], smp_processor_id(), esr,
esr_get_class_string(esr));
__show_regs(regs);
local_daif_mask();
panic("bad mode");
}
抛开事务看本质,el1_interrupt_handler handle_arch_irq
其实就是调用handle_arch_irq, 而handle_arch_irq指向irq-gic-v3.c中定义的handler函数
.align 6
SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
kernel_entry 1
el1_interrupt_handler handle_arch_irq
kernel_exit 1
SYM_CODE_END(el1_irq)
这里我们就不再深究kernel_entry和kernel_exit,它俩里面干得事情非常多。当前我们需要了解,一个是保存general purpose寄存器,一个是恢复就可以了。
.macro kernel_entry, el, regsize = 64
.if \regsize == 32
mov w0, w0 // zero upper 32 bits of x0
.endif
stp x0, x1, [sp, #16 * 0]
stp x2, x3, [sp, #16 * 1]
stp x4, x5, [sp, #16 * 2]
stp x6, x7, [sp, #16 * 3]
stp x8, x9, [sp, #16 * 4]
stp x10, x11, [sp, #16 * 5]
stp x12, x13, [sp, #16 * 6]
stp x14, x15, [sp, #16 * 7]
stp x16, x17, [sp, #16 * 8]
stp x18, x19, [sp, #16 * 9]
stp x20, x21, [sp, #16 * 10]
stp x22, x23, [sp, #16 * 11]
stp x24, x25, [sp, #16 * 12]
stp x26, x27, [sp, #16 * 13]
stp x28, x29, [sp, #16 * 14]
......
.macro kernel_exit, el
......
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
ldp x2, x3, [sp, #16 * 1]
ldp x4, x5, [sp, #16 * 2]
ldp x6, x7, [sp, #16 * 3]
ldp x8, x9, [sp, #16 * 4]
ldp x10, x11, [sp, #16 * 5]
ldp x12, x13, [sp, #16 * 6]
ldp x14, x15, [sp, #16 * 7]
ldp x16, x17, [sp, #16 * 8]
ldp x18, x19, [sp, #16 * 9]
ldp x20, x21, [sp, #16 * 10]
ldp x22, x23, [sp, #16 * 11]
ldp x24, x25, [sp, #16 * 12]
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
......
我们再来剖析gic_handle_irq()函数,其实就是涉及gic的读写了,从gic中读取硬件中断号,然后调用handle_domain_irq函数,找到相匹配的中断hander函数,然后回调。
(linux/drivers/irqchip/irq-gic-v3.c)
static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
{
u32 irqnr;
irqnr = do_read_iar(regs);
/* Check for special IDs first */
if ((irqnr >= 1020 && irqnr <= 1023))
return;
if (gic_supports_nmi() &&
unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) {
gic_handle_nmi(irqnr, regs);
return;
}
if (gic_prio_masking_enabled()) {
gic_pmr_mask_irqs();
gic_arch_enable_irqs();
}
if (static_branch_likely(&supports_deactivate_key))
gic_write_eoir(irqnr);
else
isb();
if (handle_domain_irq(gic_data.domain, irqnr, regs)) {
WARN_ONCE(true, "Unexpected interrupt received!\n");
gic_deactivate_unhandled(irqnr);
}
}
另外注意一点,在Linux Kernel5.0之后,gic中的handler处理函数,发生了一些细微的变化,如下所示:
补充IRQ Domain介绍
在linux kernel中,我们使用下面两个ID来标识一个来自外设的中断:
1、IRQ number。CPU需要为每一个外设中断编号,我们称之IRQ Number。这个IRQ number是一个虚拟的interrupt ID,和硬件无关,仅仅是被CPU用来标识一个外设中断。
2、HW interrupt ID。对于interrupt controller而言,它收集了多个外设的interrupt request line并向上传递,因此,interrupt controller需要对外设中断进行编码。Interrupt controller用HW interrupt ID来标识外设的中断。在interrupt controller级联的情况下,仅仅用HW interrupt ID已经不能唯一标识一个外设中断,还需要知道该HW interrupt ID所属的interrupt controller(HW interrupt ID在不同的Interrupt controller上是会重复编码的)。
这样,CPU和interrupt controller在标识中断上就有了一些不同的概念,但是,对于驱动工程师而言,我们和CPU视角是一样的,我们只希望得到一个IRQ number,而不关系具体是那个interrupt controller上的那个HW interrupt ID。这样一个好处是在中断相关的硬件发生变化的时候,驱动软件不需要修改。因此,linux kernel中的中断子系统需要提供一个将HW interrupt ID映射到IRQ number上来的机制…
(本段转载自:http://www.wowotech.net/linux_kenrel/irq-domain.html)
思考:
1、上文提到"在interrupt controller级联的情况下", 为什么会有中断级联,一个gic控制器可以连接好几千个中断难道还不够吗?
handle_domain_irq的处理流程如下所示,最终是调用到了我们request_irq注册的中断处理函数.
这也是我想不通的地方,一个gic控制器可以连接好几千个中断难道还不够吗? 也许是为了SOC方便设计。例如某平台(mt6785)就使用到了级联的方式
/ {
model = "MT6785";
compatible = "mediatek,MT6785";
interrupt-parent = <&sysirq>;
#address-cells = <2>;
#size-cells = <2>;
gic: interrupt-controller {
compatible = "arm,gic-v3";
#interrupt-cells = <3>;
#address-cells = <2>;
#size-cells = <2>;
#redistributor-regions = <1>;
interrupt-parent = <&gic>;
interrupt-controller;
reg = <0 0x0c000000 0 0x40000>, // distributor
<0 0x0c040000 0 0x200000>,// redistributor
<0 0x0c53a650 0 0x50>; // INT_POL
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
};
sysirq: intpol-controller@0 {
compatible = "mediatek,mt6577-sysirq";
interrupt-controller;
#interrupt-cells = <3>;
interrupt-parent = <&gic>;
reg = <0 0x0c53a650 0 0x50>;
};
pio: pinctrl {
compatible = "mediatek,mt6785-pinctrl";
reg_bases = <&gpio>,
<&iocfg_rm>,
<&iocfg_br>,
<&iocfg_bl>,
<&iocfg_lb>,
<&iocfg_rt>,
<&iocfg_lt>,
<&iocfg_tl>;
reg_base_eint = <&eint>;
pins-are-numbered;
gpio-controller;
gpio-ranges = <&pio 0 0 210>;
#gpio-cells = <2>;
interrupt-controller;
#interrupt-cells = <4>;
interrupts = <GIC_SPI 204 IRQ_TYPE_LEVEL_HIGH>;
};
/* Trustonic Mobicore SW IRQ number 121 = 32 + 89 */
mobicore {
compatible = "trustonic,mobicore";
interrupts = <GIC_SPI 89 IRQ_TYPE_EDGE_RISING>;
};
/* Microtrust SW IRQ number 91(123) ~ 95(127) & 331(363) */
utos {
compatible = "microtrust,utos";
interrupts = <GIC_SPI 91 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 92 IRQ_TYPE_EDGE_RISING>;
};
另外例如我们再看mobicore和utos node时,在该node没有interrupt-parent属性,那么认为其父节点/
就是其父节点,如果在父节点下依然没有interrupt-parent属性,那么还是继续再往上一级去寻找父节点。在父节点下(本示例为/
)找到interrupt-parent属性。该属性引用的标签为sysirq。 所以mobicore、utos中的interrupt连接的sysirq,而不是直接连的gic。
欢迎添加微信、微信群,多多交流