以下主要看了 linux 3.2 中,从 start_kernel() 开始的一些跟中断有关的初始化代码,并做了一点点简单的分析。start_kernel() 在 init/main.c 中,其中和中断有关的大概就有这样一些函数:
/* filename: init/main.c */ 467 asmlinkage void __init start_kernel(void) 468 { ... ... 488 local_irq_disable(); ... ... 499 setup_arch(&command_line); ... ... 526 trap_init(); ... ... 550 early_irq_init(); 551 init_IRQ(); ... ... 564 local_irq_enable();
/* filename: include/linux/irqflags.h */ 59 #define raw_local_irq_disable() arch_local_irq_disable() 137 #define local_irq_disable() do { raw_local_irq_disable(); } while (0)
/* filename: arch/x86/include/asm/irqflags.h */ 37 static inline void native_irq_disable(void) 38 { 39 asm volatile("cli": : :"memory"); 40 } 75 static inline notrace void arch_local_irq_disable(void) 76 { 77 native_irq_disable(); 78 }
Linux 内核在初始化阶段完成了对页式虚拟内存管理的初始化后,便调用 trap_init() 和 init_IRQ() 两个函数进行中断机制的初始化。trap_init() 主要是对一些系统保留的中断向量进行初始化。而 init_IRQ() 则主要用于外设的中断。函数 trap_init() 是在 arch/x86/kernel/traps.c 中定义的。
/* filename: arch/x86/kernel/traps.c */ 669 void __init trap_init(void) 670 { 671 int i; 672 673 #ifdef CONFIG_EISA 674 void __iomem *p = early_ioremap(0x0FFFD9, 4); 675 676 if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) 677 EISA_bus = 1; 678 early_iounmap(p, 4); 679 #endif 680 681 set_intr_gate(0, ÷_error); 682 set_intr_gate_ist(2, &nmi, NMI_STACK); 683 /* int4 can be called from all */ 684 set_system_intr_gate(4, &overflow); 685 set_intr_gate(5, &bounds); 686 set_intr_gate(6, &invalid_op); 687 set_intr_gate(7, &device_not_available); 688 #ifdef CONFIG_X86_32 689 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); 690 #else 691 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); 692 #endif 693 set_intr_gate(9, &coprocessor_segment_overrun); 694 set_intr_gate(10, &invalid_TSS); 695 set_intr_gate(11, &segment_not_present); 696 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); 697 set_intr_gate(13, &general_protection); 698 set_intr_gate(15, &spurious_interrupt_bug); 699 set_intr_gate(16, &coprocessor_error); 700 set_intr_gate(17, &alignment_check); 701 #ifdef CONFIG_X86_MCE 702 set_intr_gate_ist(18, &machine_check, MCE_STACK); 703 #endif 704 set_intr_gate(19, &simd_coprocessor_error); 705 706 /* Reserve all the builtin and the syscall vector: */ 707 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 708 set_bit(i, used_vectors); 709 710 #ifdef CONFIG_IA32_EMULATION 711 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 712 set_bit(IA32_SYSCALL_VECTOR, used_vectors); 713 #endif 714 715 #ifdef CONFIG_X86_32 716 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 717 set_bit(SYSCALL_VECTOR, used_vectors); 718 #endif 719 720 /* 721 * Should be a barrier for any external CPU state: 722 */ 723 cpu_init(); 724 725 x86_init.irqs.trap_init(); 726 }
/* filename: arch/x86/include/asm/x86_init.h */ 132 struct x86_init_ops { 133 struct x86_init_resources resources; 134 struct x86_init_mpparse mpparse; 135 struct x86_init_irqs irqs; 136 struct x86_init_oem oem; 137 struct x86_init_mapping mapping; 138 struct x86_init_paging paging; 139 struct x86_init_timers timers; 140 struct x86_init_iommu iommu; 141 struct x86_init_pci pci; 142 };
/* filename: arch/x86/include/asm/x86_init.h */ 47 /** 48 * struct x86_init_irqs - platform specific interrupt setup 49 * @pre_vector_init: init code to run before interrupt vectors 50 * are set up. 51 * @intr_init: interrupt init code 52 * @trap_init: platform specific trap setup 53 */ 54 struct x86_init_irqs { 55 void (*pre_vector_init)(void); 56 void (*intr_init)(void); 57 void (*trap_init)(void); 58 };
/* filename: arch/x86/kernel/x86_init.c */ 37 struct x86_init_ops x86_init __initdata = { ... ... 55 .irqs = { 56 .pre_vector_init = init_ISA_irqs, 57 .intr_init = native_init_IRQ, 58 .trap_init = x86_init_noop, 59 }, ... ... 91 };
/* filename: arch/x86/include/asm/desc.h */ 310 static inline void _set_gate(int gate, unsigned type, void *addr, 311 unsigned dpl, unsigned ist, unsigned seg) 312 { 313 gate_desc s; 314 315 pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); 316 /* 317 * does not need to be atomic because it is only done once at 318 * setup time 319 */ 320 write_idt_entry(idt_table, gate, &s); 321 } 329 static inline void set_intr_gate(unsigned int n, void *addr) 330 { 331 BUG_ON((unsigned)n > 0xFF); 332 _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); 333 } 359 static inline void set_system_intr_gate(unsigned int n, void *addr) 360 { 361 BUG_ON((unsigned)n > 0xFF); 362 _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); 363 } 364 365 static inline void set_system_trap_gate(unsigned int n, void *addr) 366 { 367 BUG_ON((unsigned)n > 0xFF); 368 _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); 369 } 383 static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) 384 { 385 BUG_ON((unsigned)n > 0xFF); 386 _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); 387 }
/* filename: arch/x86/include/asm/desc_defs.h */ 22 struct desc_struct { 23 union { 24 struct { 25 unsigned int a; 26 unsigned int b; 27 }; 28 struct { 29 u16 limit0; 30 u16 base0; 31 unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; 32 unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; 33 }; 34 }; 35 } __attribute__((packed));
/* filename: arch/x86/include/asm/desc.h */ 68 static inline void pack_gate(gate_desc *gate, unsigned char type, 69 unsigned long base, unsigned dpl, unsigned flags, 70 unsigned short seg) 71 { 72 gate->a = (seg << 16) | (base & 0xffff); 73 gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); 74 }
以 set_system_intr_gate() 为例,这是对系统调用的门的设置,因为系统调用是用户程序调用的,因此系统调用发生在用户空间,进而需要切换进入内核空间,所以系统调用的 dpl 值为 0x3,这样可以使得系统调用发生时,检查权限等级的时候,用户程序的权限等级不会比系统调用门的 dpl 等级低。另外,从之前的代码中可以看到,seg 的值是 __KERNEL_CS,这是内核代码段的地址,事实上,在保护模式下,__KERNEL_CS 是内核代码段在 GDT 中的选择子,其值为 0x10。(base & 0xffff) 可以得到函数地址的低 16 位,假设为 0xllll。那么 72 行计算后得到的 gate->a 的值转换为二进制应该是 1 0000 llll llll llll llll。(base & 0xffff0000) 自然就得到函数地址的高 16 位,假设为 0xhhhh,同时,(((0x80|type|(dpl<<5))&0xff)<<8) 的二进制结果为 1110 0110 0000 0000。则 73 行计算后得到的 gate->b 的值转换为二进制应该是 hhhh hhhh hhhh hhhh 1110 0110 0000 0000。gate->b 和 gate->a 则是当前这个门描述符的高 32 位和低 32 位,组成了根据中断门 (64 位) 和陷井门 (64 位) 门描述符的格式定义如下:
因此 pack_gate() 函数就是通过所提供的这些参数,构造出一个中断门描述符,并把这个中断门描述符保存到临时变量 s 中。接着 320 行调用 write_idt_entry() 把这个新产生的中断门描述符写入 idt_table 中的相应位置。
/* filename: arch/x86/include/asm/desc.h */ 103 #define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) 116 static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) 117 { 118 memcpy(&idt[entry], gate, sizeof(*gate)); 119 }
当配置或没配置 CONFIG_SPARSE_IRQ 时,early_irq_init() 函数实现形式不同。CONFIG_SPARSE_IRQ配置项,用于支持稀疏irq号,对于发行版的内核很有用,它允许定义一个高CONFIG_NR_CPUS值,但仍然不希望消耗太多内存的情况。如果配置了这个选项,那么 irq_desc 数组就有一个稀疏的布局,可以更普遍地来设置 irq_desc 数组的大小,一般是根据 CPU 的个数、IO-APIC 的个数来线性的扩展。而不配置该选项时,就必须小心的对待数组的大小,避免创建了过于大的静态数组(?)。我们就按照不配置 CONFIG_SPARSE_IRQ 的情况接着往下看了:
/* filename: kernel/irq/irqdesc.c */ 240 #else /* !CONFIG_SPARSE_IRQ */ 241 242 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { 243 [0 ... NR_IRQS-1] = { 244 .handle_irq = handle_bad_irq, 245 .depth = 1, 246 .lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock), 247 } 248 }; 249 250 int __init early_irq_init(void) 251 { 252 int count, i, node = first_online_node; 253 struct irq_desc *desc; 254 255 init_irq_default_affinity(); 256 257 printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); 258 259 desc = irq_desc; 260 count = ARRAY_SIZE(irq_desc); 261 262 for (i = 0; i < count; i++) { 263 desc[i].kstat_irqs = alloc_percpu(unsigned int); 264 alloc_masks(&desc[i], GFP_KERNEL, node); 265 raw_spin_lock_init(&desc[i].lock); 266 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 267 desc_set_defaults(i, &desc[i], node, NULL); 268 } 269 return arch_early_irq_init(); 270 }
/* filename: include/linux/irqdesc.h */ 15 /** 16 * struct irq_desc - interrupt descriptor 17 * @irq_data: per irq and chip data passed down to chip functions 18 * @timer_rand_state: pointer to timer rand state struct 19 * @kstat_irqs: irq stats per cpu 20 * @handle_irq: highlevel irq-events handler 21 * @preflow_handler: handler called before the flow handler (currently used by sparc) 22 * @action: the irq action chain 23 * @status: status information 24 * @core_internal_state__do_not_mess_with_it: core internal status information 25 * @depth: disable-depth, for nested irq_disable() calls 26 * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers 27 * @irq_count: stats field to detect stalled irqs 28 * @last_unhandled: aging timer for unhandled count 29 * @irqs_unhandled: stats field for spurious unhandled interrupts 30 * @lock: locking for SMP 31 * @affinity_hint: hint to user space for preferred irq affinity 32 * @affinity_notify: context for notification of affinity changes 33 * @pending_mask: pending rebalanced interrupts 34 * @threads_oneshot: bitfield to handle shared oneshot threads 35 * @threads_active: number of irqaction threads currently running 36 * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers 37 * @dir: /proc/irq/ procfs entry 38 * @name: flow handler name for /proc/interrupts output 39 */ 40 struct irq_desc { 41 struct irq_data irq_data; 42 struct timer_rand_state *timer_rand_state; 43 unsigned int __percpu *kstat_irqs; 44 irq_flow_handler_t handle_irq; 45 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI 46 irq_preflow_handler_t preflow_handler; 47 #endif 48 struct irqaction *action; /* IRQ action list */ 49 unsigned int status_use_accessors; 50 unsigned int core_internal_state__do_not_mess_with_it; 51 unsigned int depth; /* nested irq disables */ 52 unsigned int wake_depth; /* nested wake enables */ 53 unsigned int irq_count; /* For detecting broken IRQs */ 54 unsigned long last_unhandled; /* Aging timer for unhandled count */ 55 unsigned int irqs_unhandled; 56 raw_spinlock_t lock; 57 struct cpumask *percpu_enabled; 58 #ifdef CONFIG_SMP 59 const struct cpumask *affinity_hint; 60 struct irq_affinity_notify *affinity_notify; 61 #ifdef CONFIG_GENERIC_PENDING_IRQ 62 cpumask_var_t pending_mask; 63 #endif 64 #endif 65 unsigned long threads_oneshot; 66 atomic_t threads_active; 67 wait_queue_head_t wait_for_threads; 68 #ifdef CONFIG_PROC_FS 69 struct proc_dir_entry *dir; 70 #endif 71 struct module *owner; 72 const char *name; 73 } ____cacheline_internodealigned_in_smp;
/* filename: arch/x86/kernel/irqinit.c */ 119 void __init init_IRQ(void) 120 { 121 int i; 122 123 /* 124 * We probably need a better place for this, but it works for 125 * now ... 126 */ 127 x86_add_irq_domains(); 128 129 /* 130 * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. 131 * If these IRQ's are handled by legacy interrupt-controllers like PIC, 132 * then this configuration will likely be static after the boot. If 133 * these IRQ's are handled by more mordern controllers like IO-APIC, 134 * then this vector space can be freed and re-used dynamically as the 135 * irq's migrate etc. 136 */ 137 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 138 per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; 139 140 x86_init.irqs.intr_init(); 141 }
/* filename: arch/x86/include/asm/i8259.h */ 55 struct legacy_pic { 56 int nr_legacy_irqs; 57 struct irq_chip *chip; 58 void (*mask)(unsigned int irq); 59 void (*unmask)(unsigned int irq); 60 void (*mask_all)(void); 61 void (*restore_mask)(void); 62 void (*init)(int auto_eoi); 63 int (*irq_pending)(unsigned int irq); 64 void (*make_irq)(unsigned int irq); 65 };
/* filename: include/asm-generic/percpu.h */ 77 #define VERIFY_PERCPU_PTR(__p) ({ \ 78 __verify_pcpu_ptr((__p)); \ 79 (typeof(*(__p)) __kernel __force *)(__p); \ 80 }) 81 82 #define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var))))
/* filename: arch/x86/include/asm/irq_vectors.h */ 54 /* 55 * Vectors 0x30-0x3f are used for ISA interrupts. 56 * round up to the next 16-vector boundary 57 */ 58 #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) 59 60 #define IRQ1_VECTOR (IRQ0_VECTOR + 1) 61 #define IRQ2_VECTOR (IRQ0_VECTOR + 2) 62 #define IRQ3_VECTOR (IRQ0_VECTOR + 3) 63 #define IRQ4_VECTOR (IRQ0_VECTOR + 4) 64 #define IRQ5_VECTOR (IRQ0_VECTOR + 5) 65 #define IRQ6_VECTOR (IRQ0_VECTOR + 6) 66 #define IRQ7_VECTOR (IRQ0_VECTOR + 7) 67 #define IRQ8_VECTOR (IRQ0_VECTOR + 8) 68 #define IRQ9_VECTOR (IRQ0_VECTOR + 9) 69 #define IRQ10_VECTOR (IRQ0_VECTOR + 10) 70 #define IRQ11_VECTOR (IRQ0_VECTOR + 11) 71 #define IRQ12_VECTOR (IRQ0_VECTOR + 12) 72 #define IRQ13_VECTOR (IRQ0_VECTOR + 13) 73 #define IRQ14_VECTOR (IRQ0_VECTOR + 14) 74 #define IRQ15_VECTOR (IRQ0_VECTOR + 15)
/* filename: arch/x86/kernel/irqinit.c */ 295 void __init native_init_IRQ(void) 296 { 297 int i; 298 299 /* Execute any quirks before the call gates are initialised: */ 300 x86_init.irqs.pre_vector_init(); 301 302 apic_intr_init(); 303 304 /* 305 * Cover the whole vector space, no vector can escape 306 * us. (some of these will be overridden and become 307 * 'special' SMP interrupts) 308 */ 309 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { 310 /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ 311 if (!test_bit(i, used_vectors)) 312 set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); 313 } 314 315 if (!acpi_ioapic && !of_ioapic) 316 setup_irq(2, &irq2); 317 318 #ifdef CONFIG_X86_32 319 /* 320 * External FPU? Set up irq13 if so, for 321 * original braindamaged IBM FERR coupling. 322 */ 323 if (boot_cpu_data.hard_math && !cpu_has_fpu) 324 setup_irq(FPU_IRQ, &fpu_irq); 325 326 irq_ctx_init(smp_processor_id()); 327 #endif 328 }
/* filename: arch/x86/kernel/irqinit.c */ 104 void __init init_ISA_irqs(void) 105 { 106 struct irq_chip *chip = legacy_pic->chip; 107 const char *name = chip->name; 108 int i; 109 110 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 111 init_bsp_APIC(); 112 #endif 113 legacy_pic->init(0); 114 115 for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) 116 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); 117 }
/* filename: arch/x86/kernel/i8259.c */ 223 struct irq_chip i8259A_chip = { 224 .name = "XT-PIC", 225 .irq_mask = disable_8259A_irq, 226 .irq_disable = disable_8259A_irq, 227 .irq_unmask = enable_8259A_irq, 228 .irq_mask_ack = mask_and_ack_8259A, 229 }; 380 struct legacy_pic default_legacy_pic = { 381 .nr_legacy_irqs = NR_IRQS_LEGACY, 382 .chip = &i8259A_chip, 383 .mask = mask_8259A_irq, 384 .unmask = unmask_8259A_irq, 385 .mask_all = mask_8259A, 386 .restore_mask = unmask_8259A, 387 .init = init_8259A, 388 .irq_pending = i8259A_irq_pending, 389 .make_irq = make_8259A_irq, 390 }; 391 392 struct legacy_pic *legacy_pic = &default_legacy_pic;
/* filename: arch/x86/kernel/i8259.c */ 300 static void init_8259A(int auto_eoi) 301 { 302 unsigned long flags; 303 304 i8259A_auto_eoi = auto_eoi; 305 306 raw_spin_lock_irqsave(&i8259A_lock, flags); 307 308 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 309 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ 310 311 /* 312 * outb_pic - this has to work on a wide range of PC hardware. 313 */ 314 outb_pic(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ 315 316 /* ICW2: 8259A-1 IR0-7 mapped to 0x30-0x37 on x86-64, 317 to 0x20-0x27 on i386 */ 318 outb_pic(IRQ0_VECTOR, PIC_MASTER_IMR); 319 320 /* 8259A-1 (the master) has a slave on IR2 */ 321 outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); 322 323 if (auto_eoi) /* master does Auto EOI */ 324 outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); 325 else /* master expects normal EOI */ 326 outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); 327 328 outb_pic(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ 329 330 /* ICW2: 8259A-2 IR0-7 mapped to IRQ8_VECTOR */ 331 outb_pic(IRQ8_VECTOR, PIC_SLAVE_IMR); 332 /* 8259A-2 is a slave on master's IR2 */ 333 outb_pic(PIC_CASCADE_IR, PIC_SLAVE_IMR); 334 /* (slave's support for AEOI in flat mode is to be investigated) */ 335 outb_pic(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); 336 337 if (auto_eoi) 338 /* 339 * In AEOI mode we just have to mask the interrupt 340 * when acking. 341 */ 342 i8259A_chip.irq_mask_ack = disable_8259A_irq; 343 else 344 i8259A_chip.irq_mask_ack = mask_and_ack_8259A; 345 346 udelay(100); /* wait for 8259A to initialize */ 347 348 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ 349 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ 350 351 raw_spin_unlock_irqrestore(&i8259A_lock, flags); 352 }
/* filename: arch/x86/include/asm/i8259.h */ 12 /* i8259A PIC registers */ 13 #define PIC_MASTER_CMD 0x20 14 #define PIC_MASTER_IMR 0x21 15 #define PIC_MASTER_ISR PIC_MASTER_CMD 16 #define PIC_MASTER_POLL PIC_MASTER_ISR 17 #define PIC_MASTER_OCW3 PIC_MASTER_ISR 18 #define PIC_SLAVE_CMD 0xa0 19 #define PIC_SLAVE_IMR 0xa1 20 21 /* i8259A PIC related value */ 22 #define PIC_CASCADE_IR 2 23 #define MASTER_ICW4_DEFAULT 0x01 24 #define SLAVE_ICW4_DEFAULT 0x01 25 #define PIC_ICW4_AEOI 2
/* filename: arch/x86/include/asm/i8259.h */ 8 #define __byte(x, y) (((unsigned char *)&(y))[x]) 9 #define cached_master_mask (__byte(0, cached_irq_mask)) 10 #define cached_slave_mask (__byte(1, cached_irq_mask))