CPU在执行每条程序之前会检测是否有中断到达,即中断控制器是否有发送中断信号过来
CPU根据中断向量到IDT中读取对应的中断描述符表项,根据段选择符合偏移确定中断服务程序的地址见附录2
在分析一中,我们看到,填充IDT中断服务程序的是interrupt数组的内容,所以第2步跳转到interrupt数组对应的表项,表项的内容之前也已分析过
778 /*
779 * the CPU automatically disables interrupts when executing an IRQ vector,
780 * so IRQ-flags tracing has to follow that:
781 */
782 .p2align CONFIG_X86_L1_CACHE_SHIFT
783 common_interrupt:
784 ASM_CLAC
785 addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
786 SAVE_ALL
787 TRACE_IRQS_OFF
788 movl %esp,%eax
789 call do_IRQ
790 jmp ret_from_intr
791 ENDPROC(common_interrupt)
792 CFI_ENDPROC
根据第一篇分析,此时栈顶是(~vector + 0x80),这里减去0x80,所以值为vector num取反,范围在[-256, -1]。这么做是为了和系统调用区分,正值为系统调用号,负值为中断向量。
保存现场,将所有寄存器的值压栈(cs eip ss esp由系统自动保存)
186 .macro SAVE_ALL
187 cld
188 PUSH_GS
189 pushl_cfi %fs
190 /*CFI_REL_OFFSET fs, 0;*/
191 pushl_cfi %es
192 /*CFI_REL_OFFSET es, 0;*/
193 pushl_cfi %ds
194 /*CFI_REL_OFFSET ds, 0;*/
195 pushl_cfi %eax
196 CFI_REL_OFFSET eax, 0
197 pushl_cfi %ebp
198 CFI_REL_OFFSET ebp, 0
199 pushl_cfi %edi
200 CFI_REL_OFFSET edi, 0
201 pushl_cfi %esi
202 CFI_REL_OFFSET esi, 0
203 pushl_cfi %edx
204 CFI_REL_OFFSET edx, 0
205 pushl_cfi %ecx
206 CFI_REL_OFFSET ecx, 0
207 pushl_cfi %ebx
208 CFI_REL_OFFSET ebx, 0
209 movl $(__USER_DS), %edx
210 movl %edx, %ds
211 movl %edx, %es
212 movl $(__KERNEL_PERCPU), %edx
213 movl %edx, %fs
214 SET_KERNEL_GS %edx
215 .endm
将esp的值赋值给eax,eax作为do_IRQ的第一个参数,esp的值是以上压栈的寄存器的内容,以pt_reg形式传过去。
175 /*
176 * do_IRQ handles all normal device IRQ's (the special
177 * SMP cross-CPU interrupts have their own specific
178 * handlers).
179 */
180 __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 {
182 struct pt_regs *old_regs = set_irq_regs(regs);
183
184 /* high bit used in ret_from_ code */
185 unsigned vector = ~regs->orig_ax; //获取向量号,这里有一个取反的操作,与之前的取反相对应得到正的向量号
186 unsigned irq;
187
188 irq_enter();
189 exit_idle();
190
191 irq = __this_cpu_read(vector_irq[vector]); //通过向量号得到中断号
192
193 if (!handle_irq(irq, regs)) {
194 ack_APIC_irq();
195
196 if (irq != VECTOR_RETRIGGERED) {
197 pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
198 __func__, smp_processor_id(),
199 vector, irq);
200 } else {
201 __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
202 }
203 }
204
205 irq_exit();
206
207 set_irq_regs(old_regs);
208 return 1;
209 }
319 /*
320 * Enter an interrupt context. //进入中断上下文,因为首先处理的是硬中断,所以我们可以把irq_enter认为是硬中断的开始
321 */
322 void irq_enter(void)
323 {
324 rcu_irq_enter(); //inform RCU that current CPU is entering irq away from idle
325 if (is_idle_task(current) && !in_interrupt()) { //如果当前是pid==0的idle task并且不处于中断上下文中
326 /*
327 * Prevent raise_softirq from needlessly waking up ksoftirqd
328 * here, as softirq will be serviced on return from interrupt.
329 */
330 local_bh_disable();
331 tick_irq_enter(); //idle进程会被中断或者其他进程抢占,在系统中断过程中用irq_enter->tick_irq_enter()恢复周期性tick以得到正确的jiffies值(这段注释摘录自http://blog.chinaunix.net/uid-29675110-id-4365095.html)
332 _local_bh_enable();
333 }
334
335 __irq_enter();
336 }
28 /*
29 * It is safe to do non-atomic ops on ->hardirq_context,
30 * because NMI handlers may not preempt and the ops are
31 * always balanced, so the interrupted value of ->hardirq_context
32 * will always be restored.
33 */
34 #define __irq_enter() \
35 do { \
36 account_irq_enter_time(current); \
37 preempt_count_add(HARDIRQ_OFFSET); \ //HARDIRQ_OFFSET等于1左移16位,即将preempt_count第16 bit加1,preempt_count的格式见附录
38 trace_hardirq_enter(); \
39 } while (0)
如果系统正处在idle状态,那么退出IDLE
258 /* Called from interrupts to signify idle end */
259 void exit_idle(void)
260 {
261 /* idle loop has pid 0 */ //如果当前进程不为0,直接退出,不需要退出 idle
262 if (current->pid)
263 return;
264 __exit_idle(); //如果是idle进程,那么通过__exit_idle调用一系列notification
265 }
165 bool handle_irq(unsigned irq, struct pt_regs *regs)
166 {
167 struct irq_desc *desc;
168 int overflow;
169
170 overflow = check_stack_overflow(); //x86架构下如果sp指针距离栈底的位置小于1KB,则认为有stack overflow的风险
171
172 desc = irq_to_desc(irq); //获取desc,从刚开始的vector num-->irq num--> desc
173 if (unlikely(!desc))
174 return false;
175 //如果发生中断时,CPU正在执行用户空间的代码,处理中断需切换到内核栈,但此时内核栈是空的,所以无需再切换到中断栈
176 if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { // 在CPU的irq stack执行,否则在当前进程的栈执行,调用下面的desc->handle_irq
177 if (unlikely(overflow))
178 print_stack_overflow();
179 desc->handle_irq(irq, desc);
180 }
181
182 return true;
183 }
按照目前的内核设计,中断有自己的栈,用来执行中断服务程序,这样是为了防止中断嵌套破坏与之共享的
中断栈的定义,可以看到与进程上下文的布局相同,thread info + stack
58 /*
59 * per-CPU IRQ handling contexts (thread information and stack)
60 */
61 union irq_ctx {
62 struct thread_info tinfo;
63 u32 stack[THREAD_SIZE/sizeof(u32)];
64 } __attribute__((aligned(THREAD_SIZE)));
中断栈的初始化:
创建percpu变量hardirq_ctx和softirq_ctx,类型为irq_ctx,所以每个cpu的软硬中断有各自的stack
66 static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
67 static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
native_init_IRQ->irq_ctx_init
hardirq_ctx和softirq_ctx的初始化方式相同,如下
116 /*
117 * allocate per-cpu stacks for hardirq and for softirq processing
118 */
119 void irq_ctx_init(int cpu)
120 {
121 union irq_ctx *irqctx;
122
123 if (per_cpu(hardirq_ctx, cpu))
124 return;
125
126 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), //分配2个page
127 THREADINFO_GFP,
128 THREAD_SIZE_ORDER));
129 memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); //初始化其中的部分成员
130 irqctx->tinfo.cpu = cpu;
131 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
132
133 per_cpu(hardirq_ctx, cpu) = irqctx; //赋值给hardirq_ctx
134
135 irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
136 THREADINFO_GFP,
137 THREAD_SIZE_ORDER));
138 memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
139 irqctx->tinfo.cpu = cpu;
140 irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
141
142 per_cpu(softirq_ctx, cpu) = irqctx;
143
144 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
145 cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
146 }
发生中断时需要从当前进程栈切换到中断栈
80 static inline int
81 execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
82 {
83 union irq_ctx *curctx, *irqctx;
84 u32 *isp, arg1, arg2;
85
86 curctx = (union irq_ctx *) current_thread_info(); //获取当前进程的process context,即栈的起始地址
87 irqctx = __this_cpu_read(hardirq_ctx); //获取硬中断的hardirq context,即栈的起始地址
88
89 /*
90 * this is where we switch to the IRQ stack. However, if we are
91 * already using the IRQ stack (because we interrupted a hardirq
92 * handler) we can't do that and just have to keep using the
93 * current stack (which is the irq stack already after all)
94 */
95 if (unlikely(curctx == irqctx)) //如果当前进程的栈和中断栈相同,说明发生了中断嵌套,此时当前进程就是一个中断的服务例程
96 return 0; //这种情况下不能进行栈的切换,还是在当前栈中运行,只要返回0即可
97
98 /* build the stack frame on the IRQ stack */
99 isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); //获取中断栈的isp
100 irqctx->tinfo.task = curctx->tinfo.task; //获取当前进程的task和stack point
101 irqctx->tinfo.previous_esp = current_stack_pointer;
102
103 if (unlikely(overflow))
104 call_on_stack(print_stack_overflow, isp);
105
106 asm volatile("xchgl %%ebx,%%esp \n" //具体的栈切换发生在以下汇编中,基本上就是保存现场,进行切换,不深入研究汇编了...
107 "call *%%edi \n"
108 "movl %%ebx,%%esp \n"
109 : "=a" (arg1), "=d" (arg2), "=b" (isp)
110 : "0" (irq), "1" (desc), "2" (isp),
111 "D" (desc->handle_irq) //不管是共享栈还是独立栈,最后都会调用到irq desc对应的handle_irq
112 : "memory", "cc", "ecx");
113 return 1;
114 }
kernel中对于中断有一系列的中断流处理函数
handle_simple_irq 用于简易流控处理;
handle_level_irq 用于电平触发中断的流控处理;
handle_edge_irq 用于边沿触发中断的流控处理;
handle_fasteoi_irq 用于需要响应eoi的中断控制器;
handle_percpu_irq 用于只在单一cpu响应的中断;
handle_nested_irq 用于处理使用线程的嵌套中断;
我们在第二篇分析中,init_ISA_irqs把legacy irq的中断流处理函数都设置为handle_level_irq,以此为例做分析:
//level type中断,当硬件中断line的电平处于active level时就一直保持有中断请求,这就要求处理中断过程中屏蔽中断,响应硬件后打开中断
387 /**
388 * handle_level_irq - Level type irq handler //电平触发的中断处理函数
389 * @irq: the interrupt number
390 * @desc: the interrupt description structure for this irq
391 *
392 * Level type interrupts are active as long as the hardware line has
393 * the active level. This may require to mask the interrupt and unmask
394 * it after the associated handler has acknowledged the device, so the
395 * interrupt line is back to inactive.
396 */
397 void
398 handle_level_irq(unsigned int irq, struct irq_desc *desc)
399 {
400 raw_spin_lock(&desc->lock); //上锁
401 mask_ack_irq(desc); //mask对应的中断,否则一直接收来自interrupt line的中断信号
402
403 if (unlikely(irqd_irq_inprogress(&desc->irq_data))) //如果该中断正在其他cpu上被处理
404 if (!irq_check_poll(desc)) //这边不是很理解,irq的IRQS_POLL_INPROGRESS(polling in a progress)是什么意思?只能等后续代码遇到这个宏的时候再说。如果是在该状态,cpu relax,等待完成
405 goto out_unlock; //直接解锁退出
406 //清除IRQS_REPLAY和IRQS_WAITING标志位
407 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
408 kstat_incr_irqs_this_cpu(irq, desc); //该CPU上该irq触发次数加1,总的中断触发次数加1
409
410 /*
411 * If its disabled or no action available
412 * keep it masked and get out of here
413 */
414 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
415 desc->istate |= IRQS_PENDING; //设置为pending
416 goto out_unlock;
417 }
418
419 handle_irq_event(desc); //核心函数
420
421 cond_unmask_irq(desc); //使能中断线
422
423 out_unlock:
424 raw_spin_unlock(&desc->lock);
425 }
426 EXPORT_SYMBOL_GPL(handle_level_irq);
182 irqreturn_t handle_irq_event(struct irq_desc *desc)
183 {
184 struct irqaction *action = desc->action; //获取irqaction链表
185 irqreturn_t ret;
186
187 desc->istate &= ~IRQS_PENDING; //正式进入处理流程,清除irq desc的pending标志位
188 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); //处理中断前设置IRQD_IRQ_INPROGRESS标志
189 raw_spin_unlock(&desc->lock);
190
191 ret = handle_irq_event_percpu(desc, action);
192
193 raw_spin_lock(&desc->lock);
194 irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); //处理中断后清除IRQD_IRQ_INPROGRESS标志
195 return ret;
196 }
132 irqreturn_t
133 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
134 {
135 irqreturn_t retval = IRQ_NONE;
136 unsigned int flags = 0, irq = desc->irq_data.irq;
137
138 do {
139 irqreturn_t res;
140
141 trace_irq_handler_entry(irq, action);
142 res = action->handler(irq, action->dev_id); //调用硬中断处理函数
143 trace_irq_handler_exit(irq, action, res);
144
145 if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
146 irq, action->handler))
147 local_irq_disable();
148
149 switch (res) {
150 case IRQ_WAKE_THREAD: //线程化中断的硬中断,通常只是响应一下硬件ack,就返会IRQ_WAKE_THREAD,唤醒软中断线程
151 /*
152 * Catch drivers which return WAKE_THREAD but
153 * did not set up a thread function
154 */
155 if (unlikely(!action->thread_fn)) {
156 warn_no_thread(irq, action);
157 break;
158 }
159
160 irq_wake_thread(desc, action); //唤醒软中断线程
161
162 /* Fall through to add to randomness */
163 case IRQ_HANDLED: //表示已经在硬中断中处理完毕
164 flags |= action->flags;
165 break;
166
167 default:
168 break;
169 }
170
171 retval |= res;
172 action = action->next; //对于共享中断,所有irqaction挂在同一desc下
173 } while (action);
174
175 add_interrupt_randomness(irq, flags); //这块代码其实和中断流程的关系不大,利用用户和外设作为噪声源,为内核随机熵池做贡献....(http://jingpin.jikexueyuan.com/article/23923.html)
176
177 if (!noirqdebug)
178 note_interrupt(irq, desc, retval);
179 return retval;
180 }
以上就是中断处理流程的简要分析,有个问题,中action的handler及线程化的软中断从何而来?下篇分析见。
CPU使用IDT查到的中断服务程序的段选择符从GDT中取得相应的段描述符,段描述符里保存了中断服务程序的段基址和属性信息,此时CPU就得到了中断服务程序的起始地址。这里,CPU会根据当前cs寄存器里的CPL和GDT的段描述符的DPL,以确保中断服务程序是高于当前程序的,如果这次中断是编程异常(如:int 80h系统调用),那么还要检查CPL和IDT表中中断描述符的DPL,以保证当前程序有权限使用中断服务程序,这可以避免用户应用程序访问特殊的陷阱门和中断门[3]。
如下图显示了从中断向量到GDT中相应中断服务程序起始位置的定位方式:
44 #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) // 1左移16位
32 #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) // 8 + 8 = 16
31 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) // 0 + 8 = 8
30 #define PREEMPT_SHIFT 0
25 #define PREEMPT_BITS 8
26 #define SOFTIRQ_BITS 8
2500 void __kprobes preempt_count_add(int val)
2501 {
2502 #ifdef CONFIG_DEBUG_PREEMPT
2503 /*
2504 * Underflow?
2505 */
2506 if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
2507 return;
2508 #endif
2509 __preempt_count_add(val); //除去debug相关的内容,只有这一行关键代码,将preempt_count中第16 bit加1
2510 #ifdef CONFIG_DEBUG_PREEMPT
2511 /*
2512 * Spinlock count overflowing soon?
2513 */
2514 DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
2515 PREEMPT_MASK - 10);
2516 #endif
2517 if (preempt_count() == val)
2518 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
2519 }
2520 EXPORT_SYMBOL(preempt_count_add);