以ARM926EJ-S为例分析,
主要的代码位于arch/arm/mm/cache-arm926.s和arch/arm/mm/proc-arm926.s
#ifdef MULTI_CACHE
ENTRY(\name\()_cache_fns)
.long \name\()_flush_icache_all
.long \name\()_flush_kern_cache_all
.long \name\()_flush_user_cache_all
.long \name\()_flush_user_cache_range
.long \name\()_coherent_kern_range
.long \name\()_coherent_user_range
.long \name\()_flush_kern_dcache_area
.long \name\()_dma_map_area
.long \name\()_dma_unmap_area
.long \name\()_dma_inv_range
.long \name\()_dma_clean_range
.long \name\()_dma_flush_range
.size \name\()_cache_fns, . - \name\()_cache_fns
.endm
/*
* thecache line size of the I and D cache
*/
#define CACHE_DLINESIZE 32
/*
* MM Cache Management
* ===================
*
* The arch/arm/mm/cache-*.S andarch/arm/mm/proc-*.S files
* implement these methods.
*
* Start addresses are inclusive and endaddresses are exclusive;
* start addresses should be rounded down,end addresses up.
*
* See Documentation/cachetlb.txt for moreinformation.
* Please note that the implementation ofthese, and the required
* effects are cache-type (VIVT/VIPT/PIPT)specific.
*
* flush_icache_all()
*
* Unconditionally clean andinvalidate the entire icache.
* Currently only needed forcache-v6.S and cache-v7.S, see
* __flush_icache_all for thegeneric implementation.
*
* flush_kern_all()
*
* Unconditionally clean andinvalidate the entire cache.
*
* flush_user_all()
*
* Clean and invalidate all userspace cache entries
* before a change of pagetables.
*
* flush_user_range(start, end, flags)
*
* Clean and invalidate arange of cache entries in the
* specified address spacebefore a change of page tables.
* - start - user startaddress (inclusive, page aligned)
* - end - user end address (exclusive, page aligned)
* - flags - vma->vm_flagsfield
*
* coherent_kern_range(start, end)
*
* Ensure coherency betweenthe Icache and the Dcache in the
* region described by start, end. If you have non-snooping
* Harvard caches, you need toimplement this function.
* - start - virtual start address
* - end - virtual end address
*
* coherent_user_range(start, end)
*
* Ensure coherency betweenthe Icache and the Dcache in the
* region described by start,end. If you have non-snooping
* Harvard caches, you need toimplement this function.
* - start - virtual start address
* - end - virtual end address
*
* flush_kern_dcache_area(kaddr, size)
*
* Ensure that the data heldin page is written back.
* - kaddr - page address
* - size - region size
*
* DMA Cache Coherency
* ===================
*
* dma_inv_range(start, end)
*
* Invalidate (discard) thespecified virtual address range.
* May not write back anyentries. If 'start' or 'end'
* are not cache line aligned,those lines must be written
* back.
* - start - virtual start address
* - end - virtual end address
*
* dma_clean_range(start, end)
*
* Clean (write back) the specifiedvirtual address range.
* - start - virtual start address
* - end - virtual end address
*
* dma_flush_range(start, end)
*
* Clean and invalidate thespecified virtual address range.
* - start - virtual start address
* - end - virtual end address
*/
struct cpu_cache_fns {
void(*flush_icache_all)(void);
void(*flush_kern_all)(void);
void(*flush_user_all)(void);
void(*flush_user_range)(unsigned long, unsigned long, unsigned int);
void(*coherent_kern_range)(unsigned long, unsigned long);
void(*coherent_user_range)(unsigned long, unsigned long);
void(*flush_kern_dcache_area)(void *, size_t);
void(*dma_map_area)(const void *, size_t, int);
void(*dma_unmap_area)(const void *, size_t, int);
void(*dma_inv_range)(const void *, const void *);
void(*dma_clean_range)(const void *, const void *);
void(*dma_flush_range)(const void *, const void *);
};
extern struct cpu_cache_fns cpu_cache;
#define __cpuc_flush_icache_all cpu_cache.flush_icache_all
#define __cpuc_flush_kern_all cpu_cache.flush_kern_all
#define __cpuc_flush_user_all cpu_cache.flush_user_all
#define __cpuc_flush_user_range cpu_cache.flush_user_range
#define __cpuc_coherent_kern_range cpu_cache.coherent_kern_range
#define __cpuc_coherent_user_range cpu_cache.coherent_user_range
#define __cpuc_flush_dcache_area cpu_cache.flush_kern_dcache_area
/*
*These are private to the dma-mapping API. Do not use directly.
*Their sole purpose is to ensure that data held in the cache
* is visibleto DMA, or data written by DMA to system memory is
*visible to the CPU.
*/
#define dmac_map_area cpu_cache.dma_map_area
#define dmac_unmap_area cpu_cache.dma_unmap_area
#define dmac_inv_range cpu_cache.dma_inv_range
#define dmac_clean_range cpu_cache.dma_clean_range
#define dmac_flush_range cpu_cache.dma_flush_range
#ifdef MULTI_TLB
struct cpu_tlb_fns cpu_tlb __read_mostly;
#endif
struct cpu_tlb_fns {
void(*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
void(*flush_kern_range)(unsigned long, unsigned long);
unsignedlong tlb_flags;
};
/*
* TLB Management
* ==============
*
* The arch/arm/mm/tlb-*.S files implementthese methods.
*
* The TLB specific code is expected toperform whatever tests it
* needs to determine if it shouldinvalidate the TLB for each
* call. Start addresses are inclusive and end addresses are
* exclusive; it is safe to round theseaddresses down.
*
* flush_tlb_all()
*
* Invalidate the entire TLB.
*
* flush_tlb_mm(mm)
*
* Invalidate all TLB entriesin a particular address
* space.
* - mm - mm_struct describing address space
*
* flush_tlb_range(mm,start,end)
*
* Invalidate a range of TLBentries in the specified
* address space.
* - mm - mm_struct describing address space
* - start - start address(may not be aligned)
* - end - end address (exclusive, may not bealigned)
*
* flush_tlb_page(vaddr,vma)
*
* Invalidate the specifiedpage in the specified address range.
* - vaddr - virtual address(may not be aligned)
* - vma - vma_struct describing address range
*
* flush_kern_tlb_page(kaddr)
*
* Invalidate the TLB entryfor the specified page. The address
* will be in the kernelsvirtual memory space. Current uses
* only require the D-TLB tobe invalidated.
* - kaddr - Kernel virtualmemory address
*/
#ifdef MULTI_TLB
#define __cpu_flush_user_tlb_range cpu_tlb.flush_user_range
#define __cpu_flush_kern_tlb_range cpu_tlb.flush_kern_range
#define __cpu_tlb_flags cpu_tlb.tlb_flags
/*
*Convert calls to our calling convention.
*/
#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma)
#define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
2.体系结构指针的传递
在setup_processor函数中,体系相关的指针传递,通过 lookup_processor_type函数.
static void __init setup_processor(void)
{
structproc_info_list *list;
/*
* locate processor in the list of supportedprocessor
* types. The linker builds this table for us from the
* entries in arch/arm/mm/proc-*.S
*/
list= lookup_processor_type(read_cpuid_id());
#ifdef MULTI_CPU
processor= *list->proc;
#endif
#ifdef MULTI_TLB
cpu_tlb= *list->tlb;
#endif
#ifdef MULTI_USER
cpu_user= *list->user;
#endif
#ifdef MULTI_CACHE
cpu_cache= *list->cache;
#endif
...
}
例如, ARM926EJS
.type __arm926_proc_info,#object
__arm926_proc_info:
.long 0x41069260 @ARM926EJ-S (v5TEJ)
.long 0xff0ffff0
.long PMD_TYPE_SECT | \
PMD_SECT_BUFFERABLE| \
PMD_SECT_CACHEABLE| \
PMD_BIT4| \
PMD_SECT_AP_WRITE| \
PMD_SECT_AP_READ
.long PMD_TYPE_SECT | \
PMD_BIT4| \
PMD_SECT_AP_WRITE| \
PMD_SECT_AP_READ
b __arm926_setup
.long cpu_arch_name
.long cpu_elf_name
.long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
.long cpu_arm926_name
.long arm926_processor_functions
.long v4wbi_tlb_fns
.long v4wb_user_fns
.long arm926_cache_fns
.size __arm926_proc_info, . - __arm926_proc_info
3.cpu_tlb & cpu_cache
focuson cpu_tlb & cpu_cache.
a)cpu_cache
For example,
unmap_single--- __cpuc_flush_dcache_area, 期间不需要刷新TLB.
__cpuc_flush_dcache_area(ptr,size);----- arm926_flush_kern_dcache_area
ps:
//设置Dcache的无效位,使得主存储有效,则缓冲行无效,会从主存中获取数据
其中lr为返回寄存器,所以最后要返回时,mov pc,lr
r0为参数ptr,r1为参数size
Thesource code:
ENTRY(arm926_flush_kern_dcache_area)
add r1, r0, r1
1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry
add r0, r0, #CACHE_DLINESIZE
cmp r0, r1
blo 1b
mov r0, #0
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
mcr p15, 0, r0, c7, c10, 4 @ drain WB
mov pc,lr @return unmap_single !
Ps:
if set invalidate, it mean the phymemory will flush data to cache
if set validate,it mean the cachewill flush data to phy memory when write-back
for example,
/*
* flush_kern_cache_all()
*
* Cleanand invalidate the entire cache.
*/
ENTRY(arm926_flush_kern_cache_all)
mov r2,#VM_EXEC
mov ip,#0
__flush_whole_cache:
#ifdefCONFIG_CPU_DCACHE_WRITETHROUGH
mcr p15,0, ip, c7, c6, 0 @invalidate D cache
#else
1: mrc p15,0, r15, c7, c14, 3 @test,clean,invalidate
bne 1b
#endif
tst r2,#VM_EXEC
mcrne p15, 0, ip, c7, c5, 0 @invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @drain WB
mov pc,lr
b)cpu_tlb
这里使用的tlb 是 v4wbi的,具体的可以见ARM926EJ-s的说明
for example,
#definelocal_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e)
static inline voidipi_flush_tlb_kernel_range(void *arg)
{
struct tlb_args *ta = (structtlb_args *)arg;
local_flush_tlb_kernel_range(ta->ta_start,ta->ta_end);
}
lr=ipi_flush_tlb_kernel_range
r0=ta->ta_start
r1=ta->ta_end
local_flush_tlb_kernel_page is justa special case of local_flush_tlb_kernel_range.
The end=start+pagesize
个人观点:
From the source code ,we can seethat the flush_kernel_tlb_range is the same as flush_dcache_area (just set theinvalidate I TLB flag & D TLB flag), what is more , when we flush kerneltlb range ,we should write back the data to avoid the TLB can't hit successfully.
The function is important whencontext_swith & SMP arch.
The source code:
ENTRY(v4wbi_flush_kern_tlb_range)
mov r3,#0
mcr p15,0, r3, c7, c10, 4 @ drain WB
bic r0,r0, #0x0ff
bic r0,r0, #0xf00
1: mcr p15,0, r0, c8, c5, 1 @invalidate I TLB entry
mcr p15,0, r0, c8, c6, 1 @invalidate D TLB entry
add r0,r0, #PAGE_SZ
cmp r0,r1
blo 1b
mov pc,lr
4. 什么时候需要刷新tlb呢?
个人观点:
1.First, flush_tlb whencontext_switch
2.SMP arch flush_tlb_kernel_range(if need boardcast)
3.dma_remap/__dma_free_remap
4.unmap_area_sections it hasrelative with !CONFIG_SMP
#if!defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
/*
* Section support is unsafe on SMP - If youiounmap and ioremap a region,
* the other CPUs will not see this changeuntil their next context switch.
* Meanwhile, (eg) if an interrupt comes in onone of those other CPUs
* which requires the new ioremap'd region tobe referenced, the CPU will
* reference the _old_ region.
*
* Note that get_vm_area_caller() allocates aguard 4K page, so we need to
* mask the size back to 1MB aligned or we willoverflow in the loop below.
*/
static voidunmap_area_sections(unsigned long virt, unsigned long size)
5. unmap_kernel_range
/**
* unmap_kernel_range - unmap kernel VM areaand flush cache and TLB
* @addr: start of the VM area to unmap
* @size: size of the VM area to unmap
*
* Similar to unmap_kernel_range_noflush() butflushes vcache before
* the unmapping and tlb after.
*/
voidunmap_kernel_range(unsigned long addr, unsigned long size)