struct timespec xtime; The timespec data structure is defined in <linux/time.h> as: struct timespec { __kernel_time_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ };xtime.tv_sec保存了从1970年1月1日(UTC)以来的秒数,叫做epoch,
unsigned long seq; do { unsigned long lost; seq = read_seqbegin(&xtime_lock); usec = timer->get_offset(); lost = jiffies - wall_jiffies; if (lost) usec += lost * (1000000 / HZ); sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); } while (read_seqretry(&xtime_lock, seq));
asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) { if (likely(tv)) { struct timeval ktv; do_gettimeofday(&ktv); if (copy_to_user(tv, &ktv, sizeof(ktv))) return -EFAULT; } if (unlikely(tz)) { if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) return -EFAULT; } return 0; }
5. Timers
Timers由结构体timer_list表示, 定义在 <linux/timer.h>:
struct timer_list { struct list_head entry; /* entry in linked list of timers */ unsigned long expires; /* expiration value, in jiffies */ void (*function)(unsigned long); /* the timer handler function */ unsigned long data; /* lone argument to the handler */ struct tvec_t_base_s *base; /* internal timer field, do not touch */ };
时间到了执行函数的原型:
void my_timer_function(unsigned long data);
最后激活timer:
add_timer(&my_timer);
mod_timer()用来操作已经初始化但还没激活的timer.运行后timer被激活.
在timer到期前取消timer:
del_timer(&my_timer);
取消并等待执行函数完成:
del_timer_sync(&my_timer);//和del_timer不同,不能用在中断上下文.
内核在时间中断完成在bottom-half执行timers,softirqs类型,时间中断运行update_process_times(),会调用run_local_timers():
void run_local_timers(void) { hrtimer_run_queues(); raise_softirq(TIMER_SOFTIRQ); /* raise the timer softirq */ softlockup_tick(); }
unsigned long timeout = jiffies + 10; /* ten ticks */ while (time_before(jiffies, timeout)) ;这样系统会死等,下面的方法在等待时候允许其他进程运行:
unsigned long delay = jiffies + 5*HZ; while (time_before(jiffies, delay)) cond_resched();
udelay使用busy looping实现,通过BogoMIPS获取.
更优化的delay'方法:
schedule_timeout(),delay时任务进入sleep状态直至到期被唤醒.使用方法:
/* set task’s state to interruptible sleep */ set_current_state(TASK_INTERRUPTIBLE); /* take a nap and wake up in “s” seconds */ schedule_timeout(s * HZ);
struct page { unsigned long flags; atomic_t _count; atomic_t _mapcount; unsigned long private; struct address_space *mapping; pgoff_t index; struct list_head lru; void *virtual; };
flags: 保存页的状态,共32个bit用来表示状态,定义在<linux/page-flags.h>.
_count: 保存页使用的数目.-1时没有被使用.可以被用来新的分配.kernel使用page_count()而不是直接访问该成员.page_count()返回0表示free,非0表示在使用.page可以被page cache使用(mapping指向关联该页的 address_space对象).作为private data(private指向), 或者进程页表的映射.
virtual: 是页的虚拟地址.
page结构关联的是物理页,不是虚拟页.用来表示物理内存,而不是其中的数据.
3. kernel将页分成不同zones. Linux有4个基本的么memory zones(定义在<linux/mmzone.h>):
ZONE_DMA: 包含的页可以进行DMA.
ZONE_DMA32: 包含的页可以进行DMA, 但只能被32位设备访问.
ZONE_NORMAL: 包含普通的,可被映射的页.
ZONE_HIGHMEM: 包含high memory,这些内容不能永久被内核地址空间映射.
zones结构( <linux/mmzone.h>):
struct zone { unsigned long watermark[NR_WMARK]; unsigned long lowmem_reserve[MAX_NR_ZONES]; struct per_cpu_pageset pageset[NR_CPUS]; spinlock_t lock; struct free_area free_area[MAX_ORDER] spinlock_t lru_lock; struct zone_lru { struct list_head list; unsigned long nr_saved_scan; } lru[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; unsigned long pages_scanned; unsigned long flags; atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; int prev_priority; unsigned int inactive_ratio; wait_queue_head_t *wait_table; unsigned long wait_table_hash_nr_entries; unsigned long wait_table_bits; struct pglist_data *zone_pgdat; unsigned long zone_start_pfn; unsigned long spanned_pages; unsigned long present_pages; const char *name; };lock: 用来保护该结构避免被并发进程访问.用来保护该结构而不是其代表的zones的内容.
unsigned long page; page = __get_free_pages(GFP_KERNEL, 3); if (!page) { /* insufficient memory: you must handle this error! */ return –ENOMEM; } /* ‘page’ is now the address of the first of eight contiguous pages ... */释放内存例子:
free_pages(page, 3); /* * our pages are now freed and we should no * longer access the address stored in ‘page’ */
kmalloc()用来分配基于字节数目的内核内存.定义在 <linux/slab.h>,成功返回分配的内存地址,否则返回NULL.
void * kmalloc(size_t size, gfp_t flags);
例子:
struct dog *p; p = kmalloc(sizeof(struct dog), GFP_KERNEL); if (!p) /* handle error ... */gfp_t,定义在 <linux/types.h>,是如何分配内存的标志,有Action Modifiers,Zone Modifiers和Type Flags之分.
Zone Modifiers:指明内存从哪个zone开始分配.
char *buf; buf = kmalloc(BUF_SIZE, GFP_ATOMIC); if (!buf) /* error allocating memory ! */ .... kfree(buf);
char *buf; buf = vmalloc(16 * PAGE_SIZE); /* get 16 pages */ if (!buf) /* error! failed to allocate memory */ /* * buf now points to at least a 16*PAGE_SIZE bytes * of virtually contiguous block of memory */ After you finish with the memory, make sure to free it by using vfree(buf);
struct slab { struct list_head list; /* full, partial, or empty list */ unsigned long colouroff; /* offset for the slab coloring */ void *s_mem; /* first object in the slab */ unsigned int inuse; /* allocated objects in the slab */ kmem_bufctl_t free; /* first free object, if any */ };使用kmem_getpages通过调用__get_free_pages()来分配新的slab.
struct kmem_cache * kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *));成功返回创建的cache的指针,失败返回NULL.该函数不能在interrupt上下午中调用,因为会睡眠.
int kmem_cache_destroy(struct kmem_cache *cachep); 成功返回0,失败返回非0值.cache创建完成后,object可以通过以下函数来获得.
void * kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags);Example:
struct kmem_cache *task_struct_cachep;在 fork_init()中创建:
task_struct_cachep = kmem_cache_create(“task_struct”, sizeof(struct task_struct), ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);进程调用 fork()创建新的进程时,新的进程描述被创建, do_fork()- dup_task_struct():
struct task_struct *tsk; tsk = kmem_cache_alloc(task_struct_cachep, GFP_KERNEL); if (!tsk) return NULL;任务终止后,如果没有子任务等待,进程描述符被释放回 task_struct_cachep slab cache, free_task_struct()调用:
kmem_cache_free(task_struct_cachep, tsk);进程描述符是内核核心部分一直被需要,因而不会销毁.
void *percpu_ptr; unsigned long *foo; percpu_ptr = alloc_percpu(unsigned long); if (!ptr) /* error allocating memory .. */ foo = get_cpu_var(percpu_ptr); /* manipulate foo .. */ put_cpu_var(percpu_ptr);