E500 TLB miss 及 DSI处理分析(一)

 based on linux-3.0 kernel
---------------------------
1. kernel作用swapper_pg_dir 作为PGD表基址,应用程序使用mm->pgd保存PGD表基址。
2. 应用程序基址创建路径:copy_mm->dup_mm->mm_init->mm_alloc_pgd->pgd_alloc.
pgd_alloc.
arch/powerpc/mm/pgtable_32.c:
  1. pgd_t *pgd_alloc(struct mm_struct *mm)
  2. {
  3.     pgd_t *ret;

  4.     /* pgdir take page or two with 4K pages and a page fraction otherwise */
  5. #ifndef CONFIG_PPC_4K_PAGES
  6.     ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
  7. #else
  8.     ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
  9.             PGDIR_ORDER - PAGE_SHIFT);
  10. #endif
  11.     return ret; 
  12. }
3. 进程创建PGD表后,并没有完成PTE表的map, 一来因为加快创建速度,二来是因为创建时还不知道需要初始化哪些PTE表项.

4. 当进程使用的虚地址没有在TLB0中命中时, 将产生ITLB(intruction TLB)或者DTLB(Data TLB) Error Interrupt, 中断处理程序会搜索PTE表,查找相应的ENTRY, 如果没有找到则创建该PTE表项。

5. 当第一次访问使用页式映射的地址时,会进入DSI(data storage interrupt)异常或者ISI(instruction storage interrupt),在异常处理程序中创建PTE表的相应entry, 因为此时在TLB0中不会有该页面对应的信息。
arch/powerpc/kernel/head_fsl_booke.S:
handle_page_fault->do_page_fault->handle_mm_fault->handle_pte_fault

6. Data TLB Error Interrupt处理(instruction TLB error interrupt处理类似):
中断时,硬件自动保存下列寄存器:
SRR0:保存被中断指令的有效地址,以便异常处理结束后返回
SRR1: 中断时的MSR值,用来恢复现场
MSR: CE,ME,DE位不变,其它清0
DEAR: 保存引发此异常的数据有效地址
ESR: 异常的原因
MSRn: TLB相关信息

入口地址设置:
arch/powerpc/kernel/head_fsl_booke.S:
  -> SET_IVOR(13, DataTLBError);
DataTLBError:

  1. /* Data TLB Error Interrupt */
  2. START_EXCEPTION(DataTLBError)  //定义DataTLBError 标记
    1. /*
    2. * Exception vectors.
    3. */
    4. #define START_EXCEPTION(label) \
    5. .align 5; \  //2^5=32bit, 即4字节对齐
    6. label:
  3. mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
  4. mfspr r10, SPRN_SPRG_THREAD
  5. stw r11, THREAD_NORMSAVE(0)(r10)
  6. stw r12, THREAD_NORMSAVE(1)(r10)
  7. stw r13, THREAD_NORMSAVE(2)(r10)
  8. mfcr r13
  9. stw r13, THREAD_NORMSAVE(3)(r10)  //保存r11-r13寄存器到struct thread_struct->normsave
  10. DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 ??不是很明白
  11. mfspr r10, SPRN_DEAR /* Get faulting address */

  12. /* If we are faulting a kernel address, we have to use the
  13. * kernel page tables.
  14. */
  15. lis r11, PAGE_OFFSET@h
  16. cmplw 5, r10, r11  //r11=0xc000 0000, r10为引起中断的有效地址
  17. blt 5, 3f //如果小于,则表明是用户进程地址,跳到3f处执行
  18. lis r11, swapper_pg_dir@h //加载内核空间的PGD表基址
  19. ori r11, r11, swapper_pg_dir@l

  20. mfspr r12,SPRN_MAS1 /* Set TID to 0 */
  21. rlwinm r12,r12,0,16,1
  22. mtspr SPRN_MAS1,r12

  23. b 4f
  24. /* Get the PGD for the current thread */
  25. 3:
  26. mfspr r11,SPRN_SPRG_THREAD //struct thread_struct基址
  27. lwz r11,PGDIR(r11)

  28. 4:
  29. /* Mask of required permission bits. Note that while we
  30. * do copy ESR:ST to _PAGE_RW position as trying to write
  31. * to an RO page is pretty common, we don't do it with
  32. * _PAGE_DIRTY. We could do it, but it's a fairly rare
  33. * event so I'd rather take the overhead when it happens
  34. * rather than adding an instruction here. We should measure
  35. * whether the whole thing is worth it in the first place
  36. * as we could avoid loading SPRN_ESR completely in the first
  37. * place...
  38. *
  39. * TODO: Is it worth doing that mfspr & rlwimi in the first
  40. * place or can we save a couple of instructions here ?
  41. */
  42. mfspr r12,SPRN_ESR
  43. #ifdef CONFIG_PTE_64BIT
  44. li r13,_PAGE_PRESENT
  45. oris r13,r13,_PAGE_ACCESSED@h
  46. #else
  47. li r13,_PAGE_PRESENT|_PAGE_ACCESSED
  48. #endif
  49. rlwimi r13,r12,11,29,29

  50. FIND_PTE
    1. #define FIND_PTE \
    2. rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
    3. lwz r11, 0(r11); /* Get L1 entry */ \
    4. rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \
    5. beq 2f; /* Bail if no table */ \  //如果没有找到PTE表,则到2f处,在2f处建立PTE表
    6. rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \
    7. lwz r11, 0(r12); /* Get Linux PTE */ //r11保存PTE表的entry
    8. #endif
  51. andc. r13,r13,r11 /* Check permission */

  52. #ifdef CONFIG_PTE_64BIT
  53. #ifdef CONFIG_SMP
  54. subf r13,r11,r12 /* create false data dep */
  55. lwzx r13,r11,r13 /* Get upper pte bits */
  56. #else
  57. lwz r13,0(r12) /* Get upper pte bits */
  58. #endif
  59. #endif

  60. //页面不在物理内在,跳到2f处,在2f处会调用handle_page_fault去处理这种情况
  61. bne 2f /* Bail if permission/valid mismach */ 

  62. //由于有了PTE表,此时可以更新TLB0的ENTRY,完成TLB MISS 处理
  63. //处理完成后,中断返回
  64. /* Jump to common tlb load */
  65. b finish_tlb_load


  1. 2:  //恢复寄存器,并调用DataStorage继续处理,因为tlb miss中断并没有完成TLB的更新
  2. /* The bailout. Restore registers to pre-exception conditions
  3. * and call the heavyweights to help us out.
  4. */
  5. /* The bailout. Restore registers to pre-exception conditions
  6. * and call the heavyweights to help us out.
  7. */
  8. mfspr r10, SPRN_SPRG_THREAD
  9. lwz r11, THREAD_NORMSAVE(3)(r10)
  10. mtcr r11
  11. lwz r13, THREAD_NORMSAVE(2)(r10)
  12. lwz r12, THREAD_NORMSAVE(1)(r10)
  13. lwz r11, THREAD_NORMSAVE(0)(r10)
  14. mfspr r10, SPRN_SPRG_RSCRATCH0
  15. b DataStorage
7. DSI/ISI(Data Storage Interrupt, Intruction Storage Interrupt)中断的处理:
两种异常处理非常相似,以DSI为例.
产生原因:
a: 读写MMU产生的异常,重点处事,因为此类异常是操作系统故意设置的,以便用此异常来设置MMU表项
b: 地址空间跨越页边界,因为powerpc全用大端模式,所以不般不会发生这类情况
c: 试图改变已经被锁定的cache行,此类错误无法处理
d: lwarx, stwcx指令对cache-inhibited空间进行访问,此类错误无法处理

自动保存的寄存器:
SRR0:中断返回的地址
SRR1: MSR内容
ESR: 引发异常的条件及状态
MSR:CE,ME,DE位保留,其余位清0
DEAR: 保存引发DSI异常的数据有效地址,即对哪一个数据进行访问引发的异常

入口:
arch/powerpc/kernel/head_fsl_booke.S 
->SET_IVOR(2,  DataStorage);
DataStorage:
  1. /* Data Storage Interrupt */
  2. START_EXCEPTION(DataStorage)
  3. NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_DATA_STORAGE)
    1. //此宏作用有两个:1. 确定中断使用的堆栈空间 2. 将中断处理程序使用的通用寄存器和状态寄存器压入中断堆栈保存,为do_IRQ函数提供运行空间

    2. #define NORMAL_EXCEPTION_PROLOG(ivor_nr) \ 
    3. mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
    4. mfspr r10, SPRN_SPRG_THREAD; \
    5. stw r11, THREAD_NORMSAVE(0)(r10); \
    6. stw r13, THREAD_NORMSAVE(2)(r10); \
    7. mfcr r13; /* save CR in r13 for now */\
    8. DO_KVM ivor_nr SPRN_SRR1; \   //保存r10-r13寄存器
    9. mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ //SPRN_SRR1保存了中断前的MSR
    10. andi. r11,r11,MSR_PR; \ //检查中断前的MSR的MSR_PR位,如果为0表示在核心空间被中断了
    11. mr r11, r1; \ //r1为核心态堆栈指针,将其赋给r11
    12. beq 1f; \  //如果在内核空间,则到1f处, 此时r11已经指向栈顶

    13. /*用户空间栈顶指针计算:
    14. * r10==r3: 保存的是被中断进程的struct thread_struct thread地址
    15. * -THREAD(r10): 获得了task_struct结构体地址,该地址值为:thread结构体地址-thread元素的偏移地址
    16. * r11为struct thread_info地址:r11= task_struct地址+4,即+THREAD_INFO
    17. * ALLOC_STACK_FRAME(r11, THREAD_SIZE); 得到栈顶指针:thread_info地址+8K,即+THREAD_SIZE
    18. * ------------ --->high address
    19. *|           |   |
    20. *|  stack    |   |                       task_struct
    21. *|-----------|   8K大小    task          -------
    22. *|thread_info|   |       ------------->|     |---->stack = task_struct + 4
    23. *------------- --->low    <-------- --- |     |--->struct thread_struct = r3 = r10
    24. *                            stack       -------      = task_struct + THREAD
    25. * union thread_union {
      *   struct thread_info thread_info;
      *   unsigned long stack[THREAD_SIZE/sizeof(long)];
      * };
      *
    26. */
    1. /* if from user, start at top of this thread's kernel stack */ \
    2. lwz r11, THREAD_INFO-THREAD(r10); \
      1. #define THREAD_INFO 4 /* offsetof(struct task_struct, stack) # */
      1. #define THREAD 480 /* offsetof(struct task_struct, thread) # */
    3. ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
      1. #if (THREAD_SHIFT < 15)
      2. #define ALLOC_STACK_FRAME(reg, val) \
      3. addi reg,reg,val  // use this
      4. #else
      5. #define ALLOC_STACK_FRAME(reg, val) \
      6. addis reg,reg,val@ha ; \
      7. addi reg,reg,val@l
      8. #endif
    4. //开辟堆栈空间,大小为INT_FRAME_SIZE = STACK_INT_FRAME_SIZE = 
    5. //(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
    6. 1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
    7. stw r13, _CCR(r11); /* save various registers */ \ //保存r13
    8. stw r12,GPR12(r11); \  //保存r12, r9
    9. stw r9,GPR9(r11); \
    10. mfspr r13, SPRN_SPRG_RSCRATCH0; \ //保存r10
    11. stw r13, GPR10(r11); \
    12. lwz r12, THREAD_NORMSAVE(0)(r10); \ //保存f11
    13. stw r12,GPR11(r11); \
    14. lwz r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */ \
    15. mflr r10; \
    16. stw r10,_LINK(r11); \ //保存lr
    17. mfspr r12,SPRN_SRR0; \
    18. stw r1, GPR1(r11); \ //保存中断返回地址,即SRR0
    19. mfspr r9,SPRN_SRR1; \
    20. stw r1, 0(r11); \  //保存中断前的MSR,即SRR1
    21. mr r1, r11; \
    22. rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
    23. stw r0,GPR0(r11); \
    24. lis r10, STACK_FRAME_REGS_MARKER@ha ;/* exception frame marker */ \
    25. addi r10, r10, STACK_FRAME_REGS_MARKER@l ; \
    26. stw r10, 8(r11); \
    27. SAVE_4GPRS(3, r11); \ //保存gpr3-gpr8寄存器
    28. SAVE_2GPRS(7, r11)
    29. //总结:保存好r3-r12寄存器,建立好堆栈结构,把中断处理程序之前的MSR及中断返回地址存入r9及r12
  4. mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
  5. stw r5,_ESR(r11)
  6. mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */  //保存ESR及DEAR寄存器
  7. andis. r10,r5,(ESR_ILK|ESR_DLK)@h //如果发生了ILK或DLK异常,则到1f处执行CacheLockingException
  8. bne 1f
  9. EXC_XFER_EE_LITE(0x0300, handle_page_fault)
    1. #define EXC_XFER_EE_LITE(n, hdlr) \
    2. EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
    3. ret_from_except)
      1. #define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
      2. li r10,trap; \
      3. stw r10,_TRAP(r11); \ //保存trap值,即0x0300+1,可能是为了调试
      4. lis r10,msr@h ; \
      5. ori r10,r10,msr@l ; \
      6. copyee(r10, r9); \ //如果copyee=NOCOPY,则MSR的EE位保持不变,如果为COPY_EE则MSR的EE位更改为中断前的MSR的EE位,r9为中断前的MSR内容
        1. #define COPY_EE(d, s) rlwimi d,s,0,16,16
        2. #define NOCOPY(d, s)
      7. //依次调用函数tfer, hdlr, ret
      8. bl tfer; \  // tfer=transfer_to_handler
      9. .long hdlr; \ //hdlr=handle_page_fault
      10. .long ret //ret = ret_from_except

      11.     .globl  transfer_to_handler
        transfer_to_handler:
            stw r2,GPR2(r11) //保存GPR2,即current指针
            stw r12,_NIP(r11) //保存r12,即中断返回地址
            stw r9,_MSR(r11) //保存进入中断前的msr
            andi.   r2,r9,MSR_PR //判断当前中断在用户态还是核心态
            mfctr   r12  //ctr->r12
            mfspr   r2,SPRN_XER //xer->r2
            stw r12,_CTR(r11) //保存这两个寄存器的值
            stw r2,_XER(r11)
            mfspr   r12,SPRN_SPRG_THREAD //thread_struct->r12
            addi    r2,r12,-THREAD //获得task_struct地址值,并存入r2中
            tovirt(r2,r2)           /* set r2 to current */

        1. #define tovirt(rd,rs) \
        2. addis rd,rs,0
            beq 2f          /* if from user, fix up THREAD.regs */
            addi    r11,r1,STACK_FRAME_OVERHEAD  //中断发生在用户态情况, r11为栈顶指针
            stw r11,PT_REGS(r12) //用r11值更新thread-reg参数
      12. //如果调试或被跟踪,则清除相关事件,并跳到3f处继续
        #if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
            /* Check to see if the dbcr0 register is set up to debug.  Use the
               internal debug mode bit to do this. */
            lwz r12,THREAD_DBCR0(r12)
            andis.  r12,r12,DBCR0_IDM@h
            beq+    3f
            /* From user and task is ptraced - load up global dbcr0 */
            li  r12,-1          /* clear all pending debug events */
            mtspr   SPRN_DBSR,r12
            lis r11,global_dbcr0@ha
            tophys(r11,r11)
            addi    r11,r11,global_dbcr0@l
        #ifdef CONFIG_SMP
            rlwinm  r9,r1,0,0,(31-THREAD_SHIFT)
            lwz r9,TI_CPU(r9)
            slwi    r9,r9,3
            add r11,r11,r9
        #endif
            lwz r12,0(r11)
            mtspr   SPRN_DBCR0,r12
            lwz r12,4(r11)
            addi    r12,r12,-1
            stw r12,4(r11)
        #endif
            b   3f

        2:  /* if from kernel, check interrupted DOZE/NAP mode and
                 * check for stack overflow
                 */

            lwz r9,KSP_LIMIT(r12)
            cmplw   r1,r9           /* if r1 <= ksp_limit */
            ble-    stack_ovf       /* then the kernel stack overflowed */
        5:
        #if defined(CONFIG_6xx) || defined(CONFIG_E500)
            rlwinm  r9,r1,0,0,31-THREAD_SHIFT
            tophys(r9,r9)           /* check local flags */
            lwz r12,TI_LOCAL_FLAGS(r9)
            mtcrf   0x01,r12
            bt- 31-TLF_NAPPING,4f
            bt- 31-TLF_SLEEPING,7f
        #endif /* CONFIG_6xx || CONFIG_E500 */

      13.     .globl transfer_to_handler_cont
        transfer_to_handler_cont:
        3:
            mflr    r9  //将返回地址赋于r9,即
        .long hdlr; \ //hdlr=handle_page_fault语句地址
            lwz r11,0(r9)       /* virtual address of handler */
            lwz r9,4(r9)        /* where to go when done */ //.long ret_from_except地址
        #ifdef CONFIG_TRACE_IRQFLAGS 
        #ifdef CONFIG_BOOKE
            mtmsr   r10
        #else
            lis r12,reenable_mmu@h
            ori r12,r12,reenable_mmu@l
            mtspr   SPRN_SRR0,r12
            mtspr   SPRN_SRR1,r10
            SYNC
            RFI
        reenable_mmu:               /* re-enable mmu so we can */
            mfmsr   r10
        #endif /* !CONFIG_BOOKE */
            lwz r12,_MSR(r1)
            xor r10,r10,r12
            andi.   r10,r10,MSR_EE      /* Did EE change? */
            beq 1f
            /* Save handler and return address into the 2 unused words
             * of the STACK_FRAME_OVERHEAD (sneak sneak sneak). Everything
             * else can be recovered from the pt_regs except r3 which for
             * normal interrupts has been set to pt_regs and for syscalls
             * is an argument, so we temporarily use ORIG_GPR3 to save it
             */
            stw r9,8(r1)
            stw r11,12(r1)
            stw r3,ORIG_GPR3(r1)
            bl  trace_hardirqs_off
            lwz r0,GPR0(r1)
            lwz r3,ORIG_GPR3(r1)
            lwz r4,GPR4(r1)
            lwz r5,GPR5(r1)
            lwz r6,GPR6(r1)
            lwz r7,GPR7(r1)
            lwz r8,GPR8(r1)
            lwz r9,8(r1)
            lwz r11,12(r1)
        1:  mtctr   r11
            mtlr    r9
            bctr                /* jump to handler */
        #else /* CONFIG_TRACE_IRQFLAGS */
        #ifdef CONFIG_BOOKE
            /*
             * We're not changing address space on Book E, and the
             * extra rfi can hurt when virtualized -- whereas
             * mtmsr can be paravirtualized.
             */
            mtmsr   r10
            mtctr   r11
            mtlr    r9 //把调用返回地址设为r9
            bctr //此处会调用ctr内的值,即r11函数,即中断处理函数
        handle_page_fault执行,执行完中断处理函数后,会接着执行r9处函数,因为此时r9被设为了返回地址,即执行:ret_from_except函数
        #else
            mtspr   SPRN_SRR0,r11
            mtspr   SPRN_SRR1,r10
            mtlr    r9
            SYNC
            RFI             /* jump to handler, enable MMU */
        #endif /* !CONFIG_BOOKE */
        #endif /* CONFIG_TRACE_IRQFLAGS */
        #if defined (CONFIG_6xx) || defined(CONFIG_E500)
        4:  rlwinm  r12,r12,0,~_TLF_NAPPING
            stw r12,TI_LOCAL_FLAGS(r9)
            b   power_save_ppc32_restore

        7:  rlwinm  r12,r12,0,~_TLF_SLEEPING
            stw r12,TI_LOCAL_FLAGS(r9)
            lwz r9,_MSR(r11)        /* if sleeping, clear MSR.EE */
            rlwinm  r9,r9,0,~MSR_EE
            lwz r12,_LINK(r11)      /* and return to address in LR */
            b   fast_exception_return
        #endif

        /*
         * On kernel stack overflow, load up an initial stack pointer
         * and call StackOverflow(regs), which should not return.
         */
        stack_ovf:

            /* sometimes we use a statically-allocated stack, which is OK. */
            lis r12,_end@h
            ori r12,r12,_end@l
            cmplw   r1,r12
            ble 5b          /* r1 <= &_end is OK */
            SAVE_NVGPRS(r11)
            addi    r3,r1,STACK_FRAME_OVERHEAD
            lis r1,init_thread_union@ha
            addi    r1,r1,init_thread_union@l
            addi    r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
            lis r9,StackOverflow@ha
            addi    r9,r9,StackOverflow@l //
        StackOverflow函数会调用panic,使系统结束
            LOAD_MSR_KERNEL(r10,MSR_KERNEL)
            FIX_SRR1(r10,r12)
            mtspr   SPRN_SRR0,r9
            mtspr   SPRN_SRR1,r10
            SYNC
            RFI


  10. 1: //与上面类似,只是hdlr函数为CacheLockingException
  11. addi r3,r1,STACK_FRAME_OVERHEAD
  12. EXC_XFER_EE_LITE(0x0300, CacheLockingException)

你可能感兴趣的:(tlb)