CACHE & TLB (三)

source code

ARM926EJS为例分析,

主要的代码位于arch/arm/mm/cache-arm926.sarch/arm/mm/proc-arm926.s

1. cache和tlb的定义

 

#ifdef MULTI_CACHE

 

ENTRY(\name\()_cache_fns)

            .long    \name\()_flush_icache_all

            .long    \name\()_flush_kern_cache_all

            .long    \name\()_flush_user_cache_all

            .long    \name\()_flush_user_cache_range

            .long    \name\()_coherent_kern_range

            .long    \name\()_coherent_user_range

            .long    \name\()_flush_kern_dcache_area

            .long    \name\()_dma_map_area

            .long    \name\()_dma_unmap_area

            .long    \name\()_dma_inv_range

            .long    \name\()_dma_clean_range

            .long    \name\()_dma_flush_range

            .size     \name\()_cache_fns, . - \name\()_cache_fns

.endm

 

 

/*

 * thecache line size of the I and D cache

 */

#define CACHE_DLINESIZE          32

 

/*

 *         MM Cache Management

 *         ===================

 *

 *         The arch/arm/mm/cache-*.S andarch/arm/mm/proc-*.S files

 *         implement these methods.

 *

 *         Start addresses are inclusive and endaddresses are exclusive;

 *         start addresses should be rounded down,end addresses up.

 *

 *         See Documentation/cachetlb.txt for moreinformation.

 *         Please note that the implementation ofthese, and the required

 *         effects are cache-type (VIVT/VIPT/PIPT)specific.

 *

 *         flush_icache_all()

 *

 *                     Unconditionally clean andinvalidate the entire icache.

 *                     Currently only needed forcache-v6.S and cache-v7.S, see

 *                     __flush_icache_all for thegeneric implementation.

 *

 *         flush_kern_all()

 *

 *                     Unconditionally clean andinvalidate the entire cache.

 *

 *         flush_user_all()

 *

 *                     Clean and invalidate all userspace cache entries

 *                     before a change of pagetables.

 *

 *         flush_user_range(start, end, flags)

 *

 *                     Clean and invalidate arange of cache entries in the

 *                     specified address spacebefore a change of page tables.

 *                     - start - user startaddress (inclusive, page aligned)

 *                     - end   - user end address   (exclusive, page aligned)

 *                     - flags - vma->vm_flagsfield

 *

 *         coherent_kern_range(start, end)

 *

 *                     Ensure coherency betweenthe Icache and the Dcache in the

 *                     region described by start, end.  If you have non-snooping

 *                     Harvard caches, you need toimplement this function.

 *                     - start  - virtual start address

 *                     - end    - virtual end address

 *

 *         coherent_user_range(start, end)

 *

 *                     Ensure coherency betweenthe Icache and the Dcache in the

 *                     region described by start,end.  If you have non-snooping

 *                     Harvard caches, you need toimplement this function.

 *                     - start  - virtual start address

 *                     - end    - virtual end address

 *

 *         flush_kern_dcache_area(kaddr, size)

 *

 *                     Ensure that the data heldin page is written back.

 *                     - kaddr  - page address

 *                     - size   - region size

 *

 *         DMA Cache Coherency

 *         ===================

 *

 *         dma_inv_range(start, end)

 *

 *                     Invalidate (discard) thespecified virtual address range.

 *                     May not write back anyentries.  If 'start' or 'end'

 *                     are not cache line aligned,those lines must be written

 *                     back.

 *                     - start  - virtual start address

 *                     - end    - virtual end address

 *

 *         dma_clean_range(start, end)

 *

 *                     Clean (write back) the specifiedvirtual address range.

 *                     - start  - virtual start address

 *                     - end    - virtual end address

 *

 *         dma_flush_range(start, end)

 *

 *                     Clean and invalidate thespecified virtual address range.

 *                     - start  - virtual start address

 *                     - end    - virtual end address

 */

struct cpu_cache_fns {

            void(*flush_icache_all)(void);

            void(*flush_kern_all)(void);

            void(*flush_user_all)(void);

            void(*flush_user_range)(unsigned long, unsigned long, unsigned int);

 

            void(*coherent_kern_range)(unsigned long, unsigned long);

            void(*coherent_user_range)(unsigned long, unsigned long);

            void(*flush_kern_dcache_area)(void *, size_t);

 

            void(*dma_map_area)(const void *, size_t, int);

            void(*dma_unmap_area)(const void *, size_t, int);

 

            void(*dma_inv_range)(const void *, const void *);

            void(*dma_clean_range)(const void *, const void *);

            void(*dma_flush_range)(const void *, const void *);

};

 

 

extern struct cpu_cache_fns cpu_cache;

 

#define __cpuc_flush_icache_all                    cpu_cache.flush_icache_all

#define __cpuc_flush_kern_all                       cpu_cache.flush_kern_all

#define __cpuc_flush_user_all                       cpu_cache.flush_user_all

#define __cpuc_flush_user_range                  cpu_cache.flush_user_range

#define __cpuc_coherent_kern_range            cpu_cache.coherent_kern_range

#define __cpuc_coherent_user_range            cpu_cache.coherent_user_range

#define __cpuc_flush_dcache_area                cpu_cache.flush_kern_dcache_area

 

/*

 *These are private to the dma-mapping API. Do not use directly.

 *Their sole purpose is to ensure that data held in the cache

 * is visibleto DMA, or data written by DMA to system memory is

 *visible to the CPU.

 */

#define dmac_map_area                                 cpu_cache.dma_map_area

#define dmac_unmap_area                             cpu_cache.dma_unmap_area

#define dmac_inv_range                                cpu_cache.dma_inv_range

#define dmac_clean_range                 cpu_cache.dma_clean_range

#define dmac_flush_range                 cpu_cache.dma_flush_range

 

 

#ifdef MULTI_TLB

struct cpu_tlb_fns cpu_tlb __read_mostly;

#endif

struct cpu_tlb_fns {

            void(*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);

            void(*flush_kern_range)(unsigned long, unsigned long);

            unsignedlong tlb_flags;

};

 

/*

 *         TLB Management

 *         ==============

 *

 *         The arch/arm/mm/tlb-*.S files implementthese methods.

 *

 *         The TLB specific code is expected toperform whatever tests it

 *         needs to determine if it shouldinvalidate the TLB for each

 *         call. Start addresses are inclusive and end addresses are

 *         exclusive; it is safe to round theseaddresses down.

 *

 *         flush_tlb_all()

 *

 *                     Invalidate the entire TLB.

 *

 *         flush_tlb_mm(mm)

 *

 *                     Invalidate all TLB entriesin a particular address

 *                     space.

 *                     - mm    - mm_struct describing address space

 *

 *         flush_tlb_range(mm,start,end)

 *

 *                     Invalidate a range of TLBentries in the specified

 *                     address space.

 *                     - mm    - mm_struct describing address space

 *                     - start - start address(may not be aligned)

 *                     - end    - end address (exclusive, may not bealigned)

 *

 *         flush_tlb_page(vaddr,vma)

 *

 *                     Invalidate the specifiedpage in the specified address range.

 *                     - vaddr - virtual address(may not be aligned)

 *                     - vma   - vma_struct describing address range

 *

 *         flush_kern_tlb_page(kaddr)

 *

 *                     Invalidate the TLB entryfor the specified page.  The address

 *                     will be in the kernelsvirtual memory space.  Current uses

 *                     only require the D-TLB tobe invalidated.

 *                     - kaddr - Kernel virtualmemory address

 */

 

 

 

#ifdef MULTI_TLB

 

#define __cpu_flush_user_tlb_range  cpu_tlb.flush_user_range

#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range

#define __cpu_tlb_flags                                 cpu_tlb.tlb_flags

 

/*

 *Convert calls to our calling convention.

 */

#define local_flush_tlb_range(vma,start,end)            __cpu_flush_user_tlb_range(start,end,vma)

#define local_flush_tlb_kernel_range(s,e)                  __cpu_flush_kern_tlb_range(s,e)

 

2.体系结构指针的传递 

setup_processor函数中,体系相关的指针传递,通过 lookup_processor_type函数. 

static void __init setup_processor(void)

{

            structproc_info_list *list;

 

            /*

             * locate processor in the list of supportedprocessor

             * types. The linker builds this table for us from the

             * entries in arch/arm/mm/proc-*.S

             */

            list= lookup_processor_type(read_cpuid_id());

 

#ifdef MULTI_CPU

            processor= *list->proc;

#endif

#ifdef MULTI_TLB

            cpu_tlb= *list->tlb;

#endif

#ifdef MULTI_USER

            cpu_user= *list->user;

#endif

#ifdef MULTI_CACHE

            cpu_cache= *list->cache;

#endif

            ...

}

 

例如, ARM926EJS

 

            .type    __arm926_proc_info,#object

__arm926_proc_info:

            .long    0x41069260                            @ARM926EJ-S (v5TEJ)

            .long    0xff0ffff0

            .long   PMD_TYPE_SECT | \

                        PMD_SECT_BUFFERABLE| \

                        PMD_SECT_CACHEABLE| \

                        PMD_BIT4| \

                        PMD_SECT_AP_WRITE| \

                        PMD_SECT_AP_READ

            .long   PMD_TYPE_SECT | \

                        PMD_BIT4| \

                        PMD_SECT_AP_WRITE| \

                        PMD_SECT_AP_READ

            b          __arm926_setup

            .long    cpu_arch_name

            .long    cpu_elf_name

            .long       HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA

            .long    cpu_arm926_name

            .long    arm926_processor_functions

            .long    v4wbi_tlb_fns

            .long    v4wb_user_fns

            .long    arm926_cache_fns

            .size     __arm926_proc_info, . - __arm926_proc_info

 

 

3.cpu_tlb & cpu_cache

           

            focuson cpu_tlb & cpu_cache.

           

            a)cpu_cache

 

For example,

            unmap_single--- __cpuc_flush_dcache_area, 期间不需要刷新TLB.

           

           

            __cpuc_flush_dcache_area(ptr,size);-----  arm926_flush_kern_dcache_area

 

 

           

            ps:

            //设置Dcache的无效位,使得主存储有效,则缓冲行无效,会从主存中获取数据

            其中lr为返回寄存器,所以最后要返回时,mov pc,lr

            r0为参数ptr,r1为参数size

 

            Thesource code:

ENTRY(arm926_flush_kern_dcache_area)

            add      r1, r0, r1

1:         mcr      p15, 0, r0, c7, c14, 1               @ clean+invalidate D entry

            add      r0, r0, #CACHE_DLINESIZE

            cmp     r0, r1

            blo       1b

            mov     r0, #0

            mcr      p15, 0, r0, c7, c5, 0                 @ invalidate I cache

            mcr      p15, 0, r0, c7, c10, 4               @ drain WB

            mov     pc,lr                                      @return unmap_single !

 

 

 

            Ps:

            if set invalidate, it mean the phymemory will flush data to cache

            if set validate,it mean the cachewill flush data to phy memory when write-back

 

for example,

 

/*

 *         flush_kern_cache_all()

 *

 *         Cleanand invalidate the entire cache.

 */

ENTRY(arm926_flush_kern_cache_all)

            mov     r2,#VM_EXEC

            mov     ip,#0

__flush_whole_cache:

#ifdefCONFIG_CPU_DCACHE_WRITETHROUGH

            mcr      p15,0, ip, c7, c6, 0                  @invalidate D cache

#else

1:         mrc      p15,0, r15, c7, c14, 3             @test,clean,invalidate

            bne      1b

#endif

            tst        r2,#VM_EXEC

            mcrne  p15, 0, ip, c7, c5, 0                  @invalidate I cache

            mcrne  p15, 0, ip, c7, c10, 4                @drain WB

            mov     pc,lr

            b)cpu_tlb

            这里使用的tlb v4wbi的,具体的可以见ARM926EJ-s的说明

for example,   

            #definelocal_flush_tlb_kernel_range(s,e)      __cpu_flush_kern_tlb_range(s,e)

static inline voidipi_flush_tlb_kernel_range(void *arg)

{

            struct tlb_args *ta = (structtlb_args *)arg;

 

            local_flush_tlb_kernel_range(ta->ta_start,ta->ta_end);

}

            lr=ipi_flush_tlb_kernel_range

            r0=ta->ta_start

            r1=ta->ta_end

           

            local_flush_tlb_kernel_page is justa special case of  local_flush_tlb_kernel_range.

            The end=start+pagesize

 

            个人观点:

            From the source code ,we can seethat the flush_kernel_tlb_range is the same as flush_dcache_area (just set theinvalidate I TLB flag & D TLB flag), what is more , when we flush kerneltlb range ,we should write back the data to avoid the TLB can't hit successfully.

            The function is important whencontext_swith & SMP arch.

 

            The source code:

ENTRY(v4wbi_flush_kern_tlb_range)

            mov     r3,#0

            mcr      p15,0, r3, c7, c10, 4               @ drain WB

            bic       r0,r0, #0x0ff

            bic       r0,r0, #0xf00

1:         mcr      p15,0, r0, c8, c5, 1                 @invalidate I TLB entry

            mcr      p15,0, r0, c8, c6, 1                 @invalidate D TLB entry

            add      r0,r0, #PAGE_SZ

            cmp     r0,r1

            blo       1b

            mov     pc,lr

 

 

 4.    什么时候需要刷新tlb呢?

           个人观点:

            1.First, flush_tlb whencontext_switch

            2.SMP arch flush_tlb_kernel_range(if need boardcast)

            3.dma_remap/__dma_free_remap

            4.unmap_area_sections it hasrelative with !CONFIG_SMP

 

#if!defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)

/*

 * Section support is unsafe on SMP - If youiounmap and ioremap a region,

 * the other CPUs will not see this changeuntil their next context switch.

 * Meanwhile, (eg) if an interrupt comes in onone of those other CPUs

 * which requires the new ioremap'd region tobe referenced, the CPU will

 * reference the _old_ region.

 *

 * Note that get_vm_area_caller() allocates aguard 4K page, so we need to

 * mask the size back to 1MB aligned or we willoverflow in the loop below.

 */

static voidunmap_area_sections(unsigned long virt, unsigned long size)

           

            5. unmap_kernel_range

/**

 * unmap_kernel_range - unmap kernel VM areaand flush cache and TLB

 * @addr: start of the VM area to unmap

 * @size: size of the VM area to unmap

 *

 * Similar to unmap_kernel_range_noflush() butflushes vcache before

 * the unmapping and tlb after.

 */

voidunmap_kernel_range(unsigned long addr, unsigned long size)

 

你可能感兴趣的:(linux,kernel)