arch/x86/kernel/vmlinux.lds中有: ..... /* will be freed after init * Following ALIGN() is required to make sure no other data falls on the * same page where __smp_alt_end is pointing as that page might be freed * after boot. Always make sure that ALIGN() directive is present after * the section which contains __smp_alt_end. */ . = ALIGN(PAGE_SIZE); /* will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ .... ....////省略若干行 .... . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; } . = ALIGN(PAGE_SIZE); /* freed after init ends here */ 这说明__per_cpu_start和__per_cpu_end标识.data.percpu这个section的开头和结尾。并且,整个. data.percpu这个section都在__init_begin和__init_end之间,也就是说,该section所占内存会在系统启动后 释放(free)掉。 |
<include/linux/percpu.h> #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 在x86和ARM中,PER_CPU_ATTRIBUTES定义为空宏,所以 static DEFINE_PER_CPU(struct runqueue, runqueues); 会扩展成 __attribute__((__section__(".data.percpu"))) __typeof__(struct runqueue) per_cpu__runqueues; 也就是在.data.percpu这个section中定义了一个变量per_cpu__runqueues,其类型是struct runqueue。 事实上,这里所谓的变量per_cpu__runqueues,其实就是相对于__per_cpu_start的偏移量。(在x86中是段+偏移的寻址方式,在ARM中如何?) |
初始化函数
在start_kernel()函数中会调用setup_per_cpu_areas()
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset);一个全局变量 static void __init setup_per_cpu_areas(void) { unsigned long size, i; char *ptr; unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); ptr = alloc_bootmem_pages(size * nr_possible_cpus); for_each_possible_cpu(i) { __per_cpu_offset[i] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); ptr += size; } } #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ 在该函数中,为每个CPU分配一段内存,并将.data.percpu中的数据拷贝到其中,每个CPU各有一份,其中CPU n对应的专有数据区的首地址为__per_cpu_offset[n]。这样,前述相应于__per_cpu_start的偏移量 per_cpu__runqueues就变成了相应于__per_cpu_offset[n]的偏移量,这样.data.percpu这个section 在系统初始化后就可以释放了。 |
////在percpu.h中 #define PERCPU_ENOUGH_ROOM \ (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) #define PERCPU_MODULE_RESERVE 8192 ////保留空间 |
数据结构
<arch/x86/include/asm/pda.h> /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* 0 Current process */ unsigned long data_offset; /* 8 Per cpu data offset from linker address */ unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int cpunumber; /* 36 Logical CPU number */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; unsigned apic_timer_irqs; unsigned irq0_irqs; unsigned irq_resched_count; unsigned irq_call_count; unsigned irq_tlb_count; unsigned irq_thermal_count; unsigned irq_threshold_count; unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; extern struct x8664_pda **_cpu_pda; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) |
操作函数、宏
<include/asm-generic/percpu.h> #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu))) 所以这个宏展开为: #define per_cpu(var,cpu)\ (*RELOC_HIDE(&per_cpu_varvar,cpu_pda(cpu)->data_offset)) 即: per_cpu_varvar[cpu_pda(cpu)->data_offset] 因此,per_cpu这个宏的功能是:为cpu选择一个 每CPU数组元素,数组名为per_cpu__varvar. |
<include/asm-generic/percpu.h> /* * Add a offset to a pointer but keep the pointer as is. * * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PERCPU_PTR #define SHIFT_PERCPU_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) #endif ========================================================= <include/linux/compiler-gcc.h> /* This macro obfuscates arithmetic on a variable address so that gcc shouldn't recognize the original var, and make assumptions about it */ #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) ////这个宏返回一个ptr型的指针,指向ptr+off ========================================================= <include/asm-generic/percpu.h> #define per_cpu_var(var) per_cpu__##var ///这个宏就定义一变量, ////如#define per_cpu_var(runqueues) per_cpu__runqueuesvar ========================================================== #define per_cpu_offset(x) (__per_cpu_offset(x)) #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) ////data_offset-----Per cpu data offset from linker address.链接时候给定 的这个变量的偏移地址,也就是这个变量的名字 extern struct x8664_pda **_cpu_pda; #define cpu_pda(i) (_cpu_pda[i])////全局变量 struct x8664_pda 是一个Per processor datastructure.用来描述一个percpu data. 这个数据结构的描述见“数据结构”一节。 注:这个结构只是在x86体系结构下面的,在ARM下如何??? |
get_cpu_var(var) 和 __get_cpu_var(var)
#define get_cpu_var(var) (*({ \ extern int simple_identifier_##var(void); \ preempt_disable(); \ ////禁止内核抢占 &__get_cpu_var(var); })) ================================================== #define __get_cpu_var(var) \ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset)) 相当于:*per_cpu_varvar[my_cpu_offset] |
put_cpu_var(var)
#define put_cpu_var(var) preempt_enable()////仅仅是启动内核抢占!!!什么意思呢? |
alloc_percpu(type) 动态分配type类型的每CPU数组,返回其地址。
#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) #define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, cpu_possible_map) #define percpu_alloc_mask(size, gfp, mask) __percpu_alloc_mask((size), (gfp), &(mask)) 在mm/allocpercpu.c中定义: /** * percpu_alloc_mask - initial setup of per-cpu data * @size: size of per-cpu object * @gfp: may sleep or not etc. * @mask: populate per-data for cpu's selected through mask bits * * Populating per-cpu data for all online cpu's would be a typical use case, * which is simplified by the percpu_alloc() wrapper. * Per-cpu objects are populated with zeroed buffers. */ void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { /* * We allocate whole cache lines to avoid false sharing */ size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); void *pdata = kzalloc(sz, gfp); void *__pdata = __percpu_disguise(pdata); if (unlikely(!pdata)) return NULL; if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) return __pdata; kfree(pdata); return NULL; } EXPORT_SYMBOL_GPL(__percpu_alloc_mask); |