多核中percpu

What is percpu data?
        percpu data 是内核为smp系统中不同CPU之间的数据保护方式,系统为每个CPU维护一段私有的空间,在这段空间中的数据只有这个CPU能访问。但是这种方式不提供 对异步函数访问的保护,因此在同一个CPU上还要另外的同步原语的协作。

arch/x86/kernel/vmlinux.lds中有:
..... 
 /* will be freed after init
   * Following ALIGN() is required to make sure no other data falls on the
   * same page where __smp_alt_end is pointing as that page might be freed
   * after boot. Always make sure that ALIGN() directive is present after
   * the section which contains __smp_alt_end.
   */
  . = ALIGN(PAGE_SIZE);

  /* will be freed after init */
  . = ALIGN(PAGE_SIZE);        /* Init code and data */
....
....////省略若干行
....
  . = ALIGN(PAGE_SIZE);
  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
    __per_cpu_start = .;
    *(.data.percpu.page_aligned)
    *(.data.percpu)
    *(.data.percpu.shared_aligned)
    __per_cpu_end = .;
  }
  . = ALIGN(PAGE_SIZE);
  /* freed after init ends here */

        这说明__per_cpu_start和__per_cpu_end标识.data.percpu这个section的开头和结尾。并且,整个. data.percpu这个section都在__init_begin和__init_end之间,也就是说,该section所占内存会在系统启动后 释放(free)掉。


<include/linux/percpu.h>

#define DEFINE_PER_CPU(type, name)                    \
    __attribute__((__section__(".data.percpu")))            \
    PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name

在x86和ARM中,PER_CPU_ATTRIBUTES定义为空宏,所以
static DEFINE_PER_CPU(struct runqueue, runqueues); 
会扩展成
__attribute__((__section__(".data.percpu"))) __typeof__(struct runqueue) per_cpu__runqueues;
也就是在.data.percpu这个section中定义了一个变量per_cpu__runqueues,其类型是struct runqueue。
事实上,这里所谓的变量per_cpu__runqueues,其实就是相对于__per_cpu_start的偏移量。(在x86中是段+偏移的寻址方式,在ARM中如何?)

初始化函数
在start_kernel()函数中会调用setup_per_cpu_areas()

#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;

EXPORT_SYMBOL(__per_cpu_offset);一个全局变量

static void __init setup_per_cpu_areas(void)
{
    unsigned long size, i;
    char *ptr;
    unsigned long nr_possible_cpus = num_possible_cpus();

    /* Copy section for each CPU (we discard the original) */
    size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
    ptr = alloc_bootmem_pages(size * nr_possible_cpus);

    for_each_possible_cpu(i) {
        __per_cpu_offset[i] = ptr - __per_cpu_start;
        memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
        ptr += size;
    }
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */

        在该函数中,为每个CPU分配一段内存,并将.data.percpu中的数据拷贝到其中,每个CPU各有一份,其中CPU n对应的专有数据区的首地址为__per_cpu_offset[n]。这样,前述相应于__per_cpu_start的偏移量 per_cpu__runqueues就变成了相应于__per_cpu_offset[n]的偏移量,这样.data.percpu这个section 在系统初始化后就可以释放了。











////在percpu.h中
#define PERCPU_ENOUGH_ROOM                        \
    (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)

#define PERCPU_MODULE_RESERVE    8192  ////保留空间

数据结构

<arch/x86/include/asm/pda.h>
/* Per processor datastructure. %gs points to it while the kernel runs */
struct x8664_pda {
    struct task_struct *pcurrent;    /* 0  Current process */
    unsigned long data_offset;    /* 8 Per cpu data offset from linker
                       address */
    unsigned long kernelstack;    /* 16 top of kernel stack for current */
    unsigned long oldrsp;        /* 24 user rsp for system call */
    int irqcount;            /* 32 Irq nesting counter. Starts -1 */
    unsigned int cpunumber;        /* 36 Logical CPU number */
#ifdef CONFIG_CC_STACKPROTECTOR
    unsigned long stack_canary;    /* 40 stack canary value */
                    /* gcc-ABI: this canary MUST be at
                       offset 40!!! */
#endif
    char *irqstackptr;
    short nodenumber;        /* number of current node (32k max) */
    short in_bootmem;        /* pda lives in bootmem */
    unsigned int __softirq_pending;
    unsigned int __nmi_count;    /* number of NMI on this CPUs */
    short mmu_state;
    short isidle;
    struct mm_struct *active_mm;
    unsigned apic_timer_irqs;
    unsigned irq0_irqs;
    unsigned irq_resched_count;
    unsigned irq_call_count;
    unsigned irq_tlb_count;
    unsigned irq_thermal_count;
    unsigned irq_threshold_count;
    unsigned irq_spurious_count;
} ____cacheline_aligned_in_smp;

extern struct x8664_pda **_cpu_pda;
extern void pda_init(int);

#define cpu_pda(i) (_cpu_pda[i])

操作函数、宏

<include/asm-generic/percpu.h>
#define per_cpu(var, cpu) \
    (*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))

所以这个宏展开为:
#define per_cpu(var,cpu)\
   (*RELOC_HIDE(&per_cpu_varvar,cpu_pda(cpu)->data_offset))
即:
per_cpu_varvar[cpu_pda(cpu)->data_offset]

因此,per_cpu这个宏的功能是:为cpu选择一个
每CPU数组元素,数组名为per_cpu__varvar.
<include/asm-generic/percpu.h>
/*
 * Add a offset to a pointer but keep the pointer as is.
 *
 * Only S390 provides its own means of moving the pointer.
 */
#ifndef SHIFT_PERCPU_PTR
#define SHIFT_PERCPU_PTR(__p, __offset)    RELOC_HIDE((__p), (__offset))
#endif
=========================================================
<include/linux/compiler-gcc.h>
/* This macro obfuscates arithmetic on a variable address so that gcc
   shouldn't recognize the original var, and make assumptions about it */

#define RELOC_HIDE(ptr, off)                    \
  ({ unsigned long __ptr;                                \
    __asm__ ("" : "=r"(__ptr) : "0"(ptr));        \
    (typeof(ptr)) (__ptr + (off)); })
////这个宏返回一个ptr型的指针,指向ptr+off
=========================================================
<include/asm-generic/percpu.h>
#define per_cpu_var(var) per_cpu__##var
///这个宏就定义一变量,
////如#define per_cpu_var(runqueues) per_cpu__runqueuesvar
==========================================================

#define per_cpu_offset(x) (__per_cpu_offset(x))
#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
////data_offset-----Per cpu data offset from linker address.链接时候给定
的这个变量的偏移地址,也就是这个变量的名字
extern struct x8664_pda **_cpu_pda;
#define cpu_pda(i) (_cpu_pda[i])////全局变量
struct x8664_pda 是一个Per processor datastructure.用来描述一个percpu data.
这个数据结构的描述见“数据结构”一节。
注:这个结构只是在x86体系结构下面的,在ARM下如何???

get_cpu_var(var) 和  __get_cpu_var(var) 

#define get_cpu_var(var) (*({                \
    extern int simple_identifier_##var(void);    \
    preempt_disable();                \                         ////禁止内核抢占
    &__get_cpu_var(var); }))
==================================================
#define __get_cpu_var(var) \
    (*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
相当于:*per_cpu_varvar[my_cpu_offset]
 

put_cpu_var(var)

#define put_cpu_var(var) preempt_enable()////仅仅是启动内核抢占!!!什么意思呢?

alloc_percpu(type)  动态分配type类型的每CPU数组,返回其地址。

#define alloc_percpu(type)        (type *)__alloc_percpu(sizeof(type))
#define __alloc_percpu(size)        percpu_alloc_mask((size), GFP_KERNEL, cpu_possible_map)
#define percpu_alloc_mask(size, gfp, mask)        __percpu_alloc_mask((size), (gfp), &(mask))
在mm/allocpercpu.c中定义:
/**
 * percpu_alloc_mask - initial setup of per-cpu data
 * @size: size of per-cpu object
 * @gfp: may sleep or not etc.
 * @mask: populate per-data for cpu's selected through mask bits
 *
 * Populating per-cpu data for all online cpu's would be a typical use case,
 * which is simplified by the percpu_alloc() wrapper.
 * Per-cpu objects are populated with zeroed buffers.
 */
void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
{
    /*
     * We allocate whole cache lines to avoid false sharing
     */
    size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
    void *pdata = kzalloc(sz, gfp);
    void *__pdata = __percpu_disguise(pdata);

    if (unlikely(!pdata))
        return NULL;
    if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
        return __pdata;
    kfree(pdata);
    return NULL;
}
EXPORT_SYMBOL_GPL(__percpu_alloc_mask);

你可能感兴趣的:(多核中percpu)