x86 kernel 中断分析三——中断处理流程

CPU检测中断

CPU在执行每条程序之前会检测是否有中断到达,即中断控制器是否有发送中断信号过来

查找IDT

CPU根据中断向量到IDT中读取对应的中断描述符表项,根据段选择符合偏移确定中断服务程序的地址见附录2

interrupt数组

在分析一中,我们看到,填充IDT中断服务程序的是interrupt数组的内容,所以第2步跳转到interrupt数组对应的表项,表项的内容之前也已分析过

push vector num and jmp to common_interrupt

 778 /*
 779  * the CPU automatically disables interrupts when executing an IRQ vector,
 780  * so IRQ-flags tracing has to follow that:
 781  */
 782     .p2align CONFIG_X86_L1_CACHE_SHIFT
 783 common_interrupt:
 784     ASM_CLAC
 785     addl $-0x80,(%esp)  /* Adjust vector into the [-256,-1] range */ 
 786     SAVE_ALL
 787     TRACE_IRQS_OFF
 788     movl %esp,%eax
 789     call do_IRQ
 790     jmp ret_from_intr
 791 ENDPROC(common_interrupt)
 792     CFI_ENDPROC

addl $-0x80,(%esp)

根据第一篇分析,此时栈顶是(~vector + 0x80),这里减去0x80,所以值为vector num取反,范围在[-256, -1]。这么做是为了和系统调用区分,正值为系统调用号,负值为中断向量。

SAVE_ALL

保存现场,将所有寄存器的值压栈(cs eip ss esp由系统自动保存)

186 .macro SAVE_ALL
 187     cld
 188     PUSH_GS
 189     pushl_cfi %fs
 190     /*CFI_REL_OFFSET fs, 0;*/
 191     pushl_cfi %es
 192     /*CFI_REL_OFFSET es, 0;*/
 193     pushl_cfi %ds
 194     /*CFI_REL_OFFSET ds, 0;*/
 195     pushl_cfi %eax
 196     CFI_REL_OFFSET eax, 0
 197     pushl_cfi %ebp
 198     CFI_REL_OFFSET ebp, 0
 199     pushl_cfi %edi
 200     CFI_REL_OFFSET edi, 0
 201     pushl_cfi %esi
 202     CFI_REL_OFFSET esi, 0
 203     pushl_cfi %edx
 204     CFI_REL_OFFSET edx, 0
 205     pushl_cfi %ecx
 206     CFI_REL_OFFSET ecx, 0
 207     pushl_cfi %ebx
 208     CFI_REL_OFFSET ebx, 0
 209     movl $(__USER_DS), %edx
 210     movl %edx, %ds
 211     movl %edx, %es
 212     movl $(__KERNEL_PERCPU), %edx
 213     movl %edx, %fs
 214     SET_KERNEL_GS %edx
 215 .endm

movl %esp,%eax

将esp的值赋值给eax,eax作为do_IRQ的第一个参数,esp的值是以上压栈的寄存器的内容,以pt_reg形式传过去。

call do_IRQ

175 /*
176  * do_IRQ handles all normal device IRQ's (the special
177  * SMP cross-CPU interrupts have their own specific
178  * handlers).
179  */
180 __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
181 {
182     struct pt_regs *old_regs = set_irq_regs(regs);
183
184     /* high bit used in ret_from_ code  */
185     unsigned vector = ~regs->orig_ax;       //获取向量号,这里有一个取反的操作,与之前的取反相对应得到正的向量号
186     unsigned irq;
187
188     irq_enter();
189     exit_idle();
190
191     irq = __this_cpu_read(vector_irq[vector]);       //通过向量号得到中断号
192
193     if (!handle_irq(irq, regs)) {
194         ack_APIC_irq();
195
196         if (irq != VECTOR_RETRIGGERED) {
197             pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n",
198                          __func__, smp_processor_id(),
199                          vector, irq);
200         } else {
201             __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
202         }
203     }
204
205     irq_exit();
206
207     set_irq_regs(old_regs);
208     return 1;
209 }

irq_enter

319 /*
  320  * Enter an interrupt context.  //进入中断上下文,因为首先处理的是硬中断,所以我们可以把irq_enter认为是硬中断的开始
  321  */
  322 void irq_enter(void)
  323 {
  324     rcu_irq_enter();                                    //inform RCU that current CPU is entering irq away from idle
  325     if (is_idle_task(current) && !in_interrupt()) {   //如果当前是pid==0的idle task并且不处于中断上下文中
  326         /*
  327          * Prevent raise_softirq from needlessly waking up ksoftirqd
  328          * here, as softirq will be serviced on return from interrupt.
  329          */
  330         local_bh_disable();
  331         tick_irq_enter();     //idle进程会被中断或者其他进程抢占,在系统中断过程中用irq_enter->tick_irq_enter()恢复周期性tick以得到正确的jiffies值(这段注释摘录自http://blog.chinaunix.net/uid-29675110-id-4365095.html)
  332         _local_bh_enable();
  333     }
  334
  335     __irq_enter();
  336 }

__irq_enter

28 /*
 29  * It is safe to do non-atomic ops on ->hardirq_context,
 30  * because NMI handlers may not preempt and the ops are
 31  * always balanced, so the interrupted value of ->hardirq_context
 32  * will always be restored.
 33  */
 34 #define __irq_enter()                   \
 35     do {                        \
 36         account_irq_enter_time(current);    \
 37         preempt_count_add(HARDIRQ_OFFSET);  \             //HARDIRQ_OFFSET等于1左移16位,即将preempt_count第16 bit加1,preempt_count的格式见附录
 38         trace_hardirq_enter();          \
 39     } while (0)

exit_idle

如果系统正处在idle状态,那么退出IDLE

258 /* Called from interrupts to signify idle end */
259 void exit_idle(void)
260 {
261     /* idle loop has pid 0 */          //如果当前进程不为0,直接退出,不需要退出 idle
262     if (current->pid)
263         return;
264     __exit_idle();            //如果是idle进程,那么通过__exit_idle调用一系列notification
265 }

handle_irq

165 bool handle_irq(unsigned irq, struct pt_regs *regs)
166 {
167     struct irq_desc *desc;
168     int overflow;
169
170     overflow = check_stack_overflow();  //x86架构下如果sp指针距离栈底的位置小于1KB,则认为有stack overflow的风险
171
172     desc = irq_to_desc(irq);                        //获取desc,从刚开始的vector num-->irq num--> desc
173     if (unlikely(!desc))
174         return false;
175  //如果发生中断时,CPU正在执行用户空间的代码,处理中断需切换到内核栈,但此时内核栈是空的,所以无需再切换到中断栈
176     if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {     // 在CPU的irq stack执行,否则在当前进程的栈执行,调用下面的desc->handle_irq  
177         if (unlikely(overflow))
178             print_stack_overflow();
179         desc->handle_irq(irq, desc);
180     }
181
182     return true;
183 }

中断栈的定义及初始化

按照目前的内核设计,中断有自己的栈,用来执行中断服务程序,这样是为了防止中断嵌套破坏与之共享的
中断栈的定义,可以看到与进程上下文的布局相同,thread info + stack

 58 /*
 59  * per-CPU IRQ handling contexts (thread information and stack)
 60  */
 61 union irq_ctx {
 62     struct thread_info      tinfo;
 63     u32                     stack[THREAD_SIZE/sizeof(u32)];
 64 } __attribute__((aligned(THREAD_SIZE)));

中断栈的初始化:

创建percpu变量hardirq_ctx和softirq_ctx,类型为irq_ctx,所以每个cpu的软硬中断有各自的stack

 66 static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
 67 static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);

native_init_IRQ->irq_ctx_init
hardirq_ctx和softirq_ctx的初始化方式相同,如下

116 /*
117  * allocate per-cpu stacks for hardirq and for softirq processing
118  */
119 void irq_ctx_init(int cpu)
120 {
121     union irq_ctx *irqctx;
122
123     if (per_cpu(hardirq_ctx, cpu))
124         return;
125
126     irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),      //分配2个page      
127                            THREADINFO_GFP,
128                            THREAD_SIZE_ORDER));
129     memset(&irqctx->tinfo, 0, sizeof(struct thread_info));       //初始化其中的部分成员
130     irqctx->tinfo.cpu       = cpu;
131     irqctx->tinfo.addr_limit    = MAKE_MM_SEG(0);
132
133     per_cpu(hardirq_ctx, cpu) = irqctx;                             //赋值给hardirq_ctx
134
135     irqctx = page_address(alloc_pages_node(cpu_to_node(cpu),
136                            THREADINFO_GFP,
137                            THREAD_SIZE_ORDER));
138     memset(&irqctx->tinfo, 0, sizeof(struct thread_info));
139     irqctx->tinfo.cpu       = cpu;
140     irqctx->tinfo.addr_limit    = MAKE_MM_SEG(0);
141
142     per_cpu(softirq_ctx, cpu) = irqctx;
143
144     printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
145            cpu, per_cpu(hardirq_ctx, cpu),  per_cpu(softirq_ctx, cpu));
146 }

网上找的一张图,如下
x86 kernel 中断分析三——中断处理流程_第1张图片

中断栈的切换

发生中断时需要从当前进程栈切换到中断栈

 80 static inline int
 81 execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
 82 {
 83     union irq_ctx *curctx, *irqctx;
 84     u32 *isp, arg1, arg2;
 85
 86     curctx = (union irq_ctx *) current_thread_info();       //获取当前进程的process context,即栈的起始地址
 87     irqctx = __this_cpu_read(hardirq_ctx);                  //获取硬中断的hardirq context,即栈的起始地址
 88
 89     /*
 90      * this is where we switch to the IRQ stack. However, if we are
 91      * already using the IRQ stack (because we interrupted a hardirq
 92      * handler) we can't do that and just have to keep using the
 93      * current stack (which is the irq stack already after all)
 94      */
 95     if (unlikely(curctx == irqctx))                //如果当前进程的栈和中断栈相同,说明发生了中断嵌套,此时当前进程就是一个中断的服务例程
 96         return 0;                                   //这种情况下不能进行栈的切换,还是在当前栈中运行,只要返回0即可
 97
 98     /* build the stack frame on the IRQ stack */
 99     isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));           //获取中断栈的isp
100     irqctx->tinfo.task = curctx->tinfo.task;                    //获取当前进程的task和stack point
101     irqctx->tinfo.previous_esp = current_stack_pointer;
102
103     if (unlikely(overflow))
104         call_on_stack(print_stack_overflow, isp);
105
106     asm volatile("xchgl %%ebx,%%esp \n"                    //具体的栈切换发生在以下汇编中,基本上就是保存现场,进行切换,不深入研究汇编了...
107              "call  *%%edi      \n"
108              "movl  %%ebx,%%esp \n"
109              : "=a" (arg1), "=d" (arg2), "=b" (isp)
110              :  "0" (irq),   "1" (desc),  "2" (isp),
111             "D" (desc->handle_irq)                                    //不管是共享栈还是独立栈,最后都会调用到irq desc对应的handle_irq
112              : "memory", "cc", "ecx");
113     return 1;
114 }

handle_level_irq

kernel中对于中断有一系列的中断流处理函数

handle_simple_irq  用于简易流控处理;
handle_level_irq   用于电平触发中断的流控处理;
handle_edge_irq    用于边沿触发中断的流控处理;
handle_fasteoi_irq  用于需要响应eoi的中断控制器;
handle_percpu_irq   用于只在单一cpu响应的中断;
handle_nested_irq   用于处理使用线程的嵌套中断;

我们在第二篇分析中,init_ISA_irqs把legacy irq的中断流处理函数都设置为handle_level_irq,以此为例做分析:

//level type中断,当硬件中断line的电平处于active level时就一直保持有中断请求,这就要求处理中断过程中屏蔽中断,响应硬件后打开中断
387 /**
388  *  handle_level_irq - Level type irq handler          //电平触发的中断处理函数
389  *  @irq:   the interrupt number
390  *  @desc:  the interrupt description structure for this irq
391  *
392  *  Level type interrupts are active as long as the hardware line has          
393  *  the active level. This may require to mask the interrupt and unmask        
394  *  it after the associated handler has acknowledged the device, so the
395  *  interrupt line is back to inactive.
396  */
397 void
398 handle_level_irq(unsigned int irq, struct irq_desc *desc)
399 {
400     raw_spin_lock(&desc->lock);                         //上锁
401     mask_ack_irq(desc);       //mask对应的中断,否则一直接收来自interrupt line的中断信号
402
403     if (unlikely(irqd_irq_inprogress(&desc->irq_data)))  //如果该中断正在其他cpu上被处理
404         if (!irq_check_poll(desc))  //这边不是很理解,irq的IRQS_POLL_INPROGRESS(polling in a progress)是什么意思?只能等后续代码遇到这个宏的时候再说。如果是在该状态,cpu relax,等待完成
405             goto out_unlock;                        //直接解锁退出
406     //清除IRQS_REPLAY和IRQS_WAITING标志位
407     desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
408     kstat_incr_irqs_this_cpu(irq, desc);   //该CPU上该irq触发次数加1,总的中断触发次数加1
409
410     /*
411      * If its disabled or no action available
412      * keep it masked and get out of here
413      */
414     if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
415         desc->istate |= IRQS_PENDING;                   //设置为pending
416         goto out_unlock;
417     }
418
419     handle_irq_event(desc);                        //核心函数
420
421     cond_unmask_irq(desc);                           //使能中断线
422
423 out_unlock:
424     raw_spin_unlock(&desc->lock);
425 }
426 EXPORT_SYMBOL_GPL(handle_level_irq);

handle irq event

182 irqreturn_t handle_irq_event(struct irq_desc *desc)
183 {
184     struct irqaction *action = desc->action;         //获取irqaction链表
185     irqreturn_t ret;
186
187     desc->istate &= ~IRQS_PENDING;        //正式进入处理流程,清除irq desc的pending标志位
188     irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);        //处理中断前设置IRQD_IRQ_INPROGRESS标志
189     raw_spin_unlock(&desc->lock);
190
191     ret = handle_irq_event_percpu(desc, action);            
192
193     raw_spin_lock(&desc->lock);
194     irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);   //处理中断后清除IRQD_IRQ_INPROGRESS标志
195     return ret;
196 }

handle_irq_event_percpu

132 irqreturn_t
133 handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
134 {
135     irqreturn_t retval = IRQ_NONE;
136     unsigned int flags = 0, irq = desc->irq_data.irq;
137
138     do {
139         irqreturn_t res;
140
141         trace_irq_handler_entry(irq, action);
142         res = action->handler(irq, action->dev_id); //调用硬中断处理函数
143         trace_irq_handler_exit(irq, action, res);
144
145         if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
146                   irq, action->handler))
147             local_irq_disable();
148
149         switch (res) {
150         case IRQ_WAKE_THREAD:      //线程化中断的硬中断,通常只是响应一下硬件ack,就返会IRQ_WAKE_THREAD,唤醒软中断线程
151             /*
152              * Catch drivers which return WAKE_THREAD but
153              * did not set up a thread function
154              */
155             if (unlikely(!action->thread_fn)) {
156                 warn_no_thread(irq, action);
157                 break;
158             }
159
160             irq_wake_thread(desc, action);            //唤醒软中断线程
161
162             /* Fall through to add to randomness */
163         case IRQ_HANDLED:                        //表示已经在硬中断中处理完毕
164             flags |= action->flags;
165             break;
166
167         default:
168             break;
169         }
170
171         retval |= res;
172         action = action->next;            //对于共享中断,所有irqaction挂在同一desc下
173     } while (action);
174
175     add_interrupt_randomness(irq, flags);    //这块代码其实和中断流程的关系不大,利用用户和外设作为噪声源,为内核随机熵池做贡献....(http://jingpin.jikexueyuan.com/article/23923.html)
176
177     if (!noirqdebug)
178         note_interrupt(irq, desc, retval);
179     return retval;
180 }

以上就是中断处理流程的简要分析,有个问题,中action的handler及线程化的软中断从何而来?下篇分析见。

附录1:

CPU使用IDT查到的中断服务程序的段选择符从GDT中取得相应的段描述符,段描述符里保存了中断服务程序的段基址和属性信息,此时CPU就得到了中断服务程序的起始地址。这里,CPU会根据当前cs寄存器里的CPL和GDT的段描述符的DPL,以确保中断服务程序是高于当前程序的,如果这次中断是编程异常(如:int 80h系统调用),那么还要检查CPL和IDT表中中断描述符的DPL,以保证当前程序有权限使用中断服务程序,这可以避免用户应用程序访问特殊的陷阱门和中断门[3]。
如下图显示了从中断向量到GDT中相应中断服务程序起始位置的定位方式:

x86 kernel 中断分析三——中断处理流程_第2张图片

附录2. preempt_count:

 44 #define HARDIRQ_OFFSET  (1UL << HARDIRQ_SHIFT)         // 1左移16位
 32 #define HARDIRQ_SHIFT   (SOFTIRQ_SHIFT + SOFTIRQ_BITS)  // 8 + 8 = 16
 31 #define SOFTIRQ_SHIFT   (PREEMPT_SHIFT + PREEMPT_BITS)  // 0 + 8 = 8
 30 #define PREEMPT_SHIFT   0
 25 #define PREEMPT_BITS    8
 26 #define SOFTIRQ_BITS    8

2500 void __kprobes preempt_count_add(int val)
2501 {
2502 #ifdef CONFIG_DEBUG_PREEMPT
2503     /*
2504      * Underflow?
2505      */
2506     if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
2507         return;
2508 #endif
2509     __preempt_count_add(val);    //除去debug相关的内容,只有这一行关键代码,将preempt_count中第16 bit加1
2510 #ifdef CONFIG_DEBUG_PREEMPT
2511     /*
2512      * Spinlock count overflowing soon?
2513      */
2514     DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
2515                 PREEMPT_MASK - 10);
2516 #endif
2517     if (preempt_count() == val)
2518         trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
2519 }
2520 EXPORT_SYMBOL(preempt_count_add);

preempt_count的布局如下:
这里写图片描述

你可能感兴趣的:(interrupt)