scheduler 怎样得到 next task_struct

/**/

__schedule 调用函数pick_next_task从rq中得到一个task

/*
 * Pick up the highest-prio task:
 */
static inline struct task_struct *
pick_next_task(struct rq *rq)
{
    const struct sched_class *class;
    struct task_struct *p;

    /*
     * Optimization: we know that if all tasks are in
     * the fair class we can call that function directly:
     */
    if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
        p = fair_sched_class.pick_next_task(rq);
        if (likely(p))
            return p;
    }

    for_each_class(class) {
        p = class->pick_next_task(rq);
        if (p)
            return p;
    }

    BUG(); /* the idle class will always have a runnable task */
}

#define sched_class_highest (&stop_sched_class)
#define for_each_class(class) \
   for (class = sched_class_highest; class; class = class->next)
sched_class的next成员指向下一个class,形成一个单向list
由list command得到所有的class.
crash> sym stop_sched_class
c0454ee8 (R) stop_sched_class
crash> list sched_class.next c0454ee8
c0454ee8
c0454e80
c0454e24
c0454dc8
crash> sym c0454ee8 c0454e80 c0454e24 c0454dc8
c0454ee8 (R) stop_sched_class
c0454e80 (R) rt_sched_class
c0454e24 (R) fair_sched_class
c0454dc8 (R) idle_sched_class

crash> list sched_class.next -s sched_class.pick_next_task c0454ee8
c0454ee8
  pick_next_task = 0xc0055850 <pick_next_task_stop>
c0454e80
  pick_next_task = 0xc0054b4c <pick_next_task_rt>
c0454e24
  pick_next_task = 0xc0050e4c <pick_next_task_fair>
c0454dc8
  pick_next_task = 0xc004fe08 <pick_next_task_idle
>

各sched_class是怎样得到next_task的?

idle:直接得到

static struct task_struct *pick_next_task_idle(struct rq *rq)
{
    schedstat_inc(rq, sched_goidle);
    calc_load_account_idle(rq);
    return rq->idle;
}

fair class:通过红黑树

static struct task_struct *pick_next_task_fair(struct rq *rq)
{
    struct task_struct *p;
    struct cfs_rq *cfs_rq = &rq->cfs;
    struct sched_entity *se;

    if (!cfs_rq->nr_running)
        return NULL;

    do {
        se = pick_next_entity(cfs_rq);
        set_next_entity(cfs_rq, se);
    } while (cfs_rq);

    p = task_of(se);

    return p;
}

pick_next_entity -> __pick_first_entity -> {struct rb_node *left = cfs_rq->rb_leftmost;}
cfs:这里才用到rb_node

rt:

pick_next_task_rt:
使用的是位图bitmap

/*
 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
 * values are inverted: lower p->prio value means higher priority.
 *
 * The MAX_USER_RT_PRIO value allows the actual maximum
 * RT priority to be separate from the value exported to
 * user-space.  This allows kernel threads to set their
 * priority to a value higher than any user task. Note:
 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
 */

#define MAX_USER_RT_PRIO    100
#define MAX_RT_PRIO        MAX_USER_RT_PRIO

#define MAX_PRIO        (MAX_RT_PRIO + 40)
#define DEFAULT_PRIO        (MAX_RT_PRIO + 20)

/*
 * This is the priority-queue data structure of the RT scheduling class:
 */
struct rt_prio_array {
    DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
    struct list_head queue[MAX_RT_PRIO];
};
并不是没有优先级都对应一个 queue, [0, 100]的优先级才有

进程的 sched_class 成员是何时赋值的?

创建进程时或设置优先级时
1.
void sched_set_stop_task(int cpu, struct task_struct *stop)
    -> stop->sched_class = &stop_sched_class;

2./*设置优先级时*/
void rt_mutex_setprio(struct task_struct *p, int prio)
    ->     if (rt_prio(prio))
        p->sched_class = &rt_sched_class;
3.
void sched_fork(struct task_struct *p)
    ->     if (!rt_prio(p->prio))
        p->sched_class = &fair_sched_class;

4.
void __cpuinit init_idle(struct task_struct *idle, int cpu)
    idle->sched_class = &idle_sched_class;

与进程调度有关的数据结构

crash> rt_rq -o

struct rt_rq {
    [0] struct rt_prio_array active;
  [816] unsigned long rt_nr_running;
        struct {
            int curr;
            int next;
  [820] } highest_prio;
  [828] unsigned long rt_nr_migratory;
  [832] unsigned long rt_nr_total;
  [836] int overloaded;
  [840] struct plist_head pushable_tasks;
  [848] int rt_throttled;
  [856] u64 rt_time;
  [864] u64 rt_runtime;
  [872] raw_spinlock_t rt_runtime_lock;
}
SIZE: 880

SIZE: 880
其中的rt_prio_array占了880字节中的816字节。
crash> rt_prio_array
struct rt_prio_array {
    unsigned long bitmap[4];
    struct list_head queue[100];
}
SIZE: 816

crash> cfs_rq

struct cfs_rq {
    struct load_weight load;
    unsigned long nr_running;
    unsigned long h_nr_running;
    u64 exec_clock;
    u64 min_vruntime;
    u64 min_vruntime_copy;
    struct rb_root tasks_timeline;
    struct rb_node *rb_leftmost;
    struct sched_entity *curr;
    struct sched_entity *next;
    struct sched_entity *last;
    struct sched_entity *skip;
    unsigned int nr_spread_over;
}
SIZE: 72

crash> struct rq -o

struct rq {
     [0] raw_spinlock_t lock;
     [4] unsigned long nr_running;
     [8] unsigned long cpu_load[5];
    [28] unsigned long last_load_update_tick;
    [32] u64 nohz_stamp;
    [40] unsigned long nohz_flags;
    [44] int skip_clock_update;
    [48] struct load_weight load;
    [56] unsigned long nr_load_updates;
    [64] u64 nr_switches;
    [72] struct cfs_rq cfs;
   [144] struct rt_rq rt;
  [1024] unsigned long nr_uninterruptible;
  [1028] struct task_struct *curr;
  [1032] struct task_struct *idle;
  [1036] struct task_struct *stop;
  [1040] unsigned long next_balance;
  [1044] struct mm_struct *prev_mm;
  [1048] u64 clock;
  [1056] u64 clock_task;
  [1064] atomic_t nr_iowait;
  [1068] struct root_domain *rd;
  [1072] struct sched_domain *sd;
  [1076] unsigned long cpu_power;
  [1080] unsigned char idle_balance;
  [1084] int post_schedule;
  [1088] int active_balance;
  [1092] int push_cpu;
  [1096] struct cpu_stop_work active_balance_work;
  [1116] int cpu;
  [1120] int online;
  [1124] struct list_head cfs_tasks;
  [1136] u64 rt_avg;
  [1144] u64 age_stamp;
  [1152] u64 idle_stamp;
  [1160] u64 avg_idle;
  [1168] unsigned long calc_load_update;
  [1172] long calc_load_active;
  [1176] struct llist_head wake_list;
}
SIZE: 1184

crash> task_struct -o

struct task_struct {
    [0] volatile long state;
    [4] void *stack;
    [8] atomic_t usage;
   [12] unsigned int flags;
   [16] unsigned int ptrace;
   [20] struct llist_node wake_entry;
   [24] int on_cpu;
   [28] int on_rq;
   [32] int prio;
   [36] int static_prio;
   [40] int normal_prio;
   [44] unsigned int rt_priority;
   [48] const struct sched_class *sched_class;
   [56] struct sched_entity se;
  [128] struct sched_rt_entity rt;
  [152] unsigned char fpu_counter;
  [156] unsigned int policy;
  [160] cpumask_t cpus_allowed;
  [164] struct list_head tasks;
  [172] struct plist_node pushable_tasks;
  [192] struct mm_struct *mm;
  [196] struct mm_struct *active_mm;
  [200] unsigned int brk_randomized : 1;
  [204] struct task_rss_stat rss_stat;
  [220] int exit_state;
  [224] int exit_code;
  [228] int exit_signal;
  [232] int pdeath_signal;
  [236] unsigned int jobctl;
  [240] unsigned int personality;
  [244] unsigned int did_exec : 1;
  [244] unsigned int in_execve : 1;
  [244] unsigned int in_iowait : 1;
  [244] unsigned int sched_reset_on_fork : 1;
  [244] unsigned int sched_contributes_to_load : 1;
  [244] unsigned int irq_thread : 1;
  [248] pid_t pid;
  [252] pid_t tgid;
  [256] struct task_struct *real_parent;
  [260] struct task_struct *parent;
  [264] struct list_head children;
  [272] struct list_head sibling;
  [280] struct task_struct *group_leader;
  [284] struct list_head ptraced;
  [292] struct list_head ptrace_entry;
  [300] struct pid_link pids[3];
  [336] struct list_head thread_group;
  [344] struct completion *vfork_done;
  [348] int *set_child_tid;
  [352] int *clear_child_tid;
  [356] cputime_t utime;
  [360] cputime_t stime;
  [364] cputime_t utimescaled;
  [368] cputime_t stimescaled;
  [372] cputime_t gtime;
  [376] cputime_t prev_utime;
  [380] cputime_t prev_stime;
  [384] unsigned long nvcsw;
  [388] unsigned long nivcsw;
  [392] struct timespec start_time;
  [400] struct timespec real_start_time;
  [408] unsigned long min_flt;
  [412] unsigned long maj_flt;
  [416] struct task_cputime cputime_expires;
  [432] struct list_head cpu_timers[3];
  [456] const struct cred *real_cred;
  [460] const struct cred *cred;
  [464] struct cred *replacement_session_keyring;
  [468] char comm[16];
  [484] int link_count;
  [488] int total_link_count;
  [492] struct sysv_sem sysvsem;
  [496] unsigned long last_switch_count;
  [500] struct thread_struct thread;
  [512] struct fs_struct *fs;
  [516] struct files_struct *files;
  [520] struct nsproxy *nsproxy;
  [524] struct signal_struct *signal;
  [528] struct sighand_struct *sighand;
  [532] sigset_t blocked;
  [540] sigset_t real_blocked;
  [548] sigset_t saved_sigmask;
  [556] struct sigpending pending;
  [572] unsigned long sas_ss_sp;
  [576] size_t sas_ss_size;
  [580] int (*notifier)(void *);
  [584] void *notifier_data;
  [588] sigset_t *notifier_mask;
  [592] struct audit_context *audit_context;
  [596] seccomp_t seccomp;
  [596] u32 parent_exec_id;
  [600] u32 self_exec_id;
  [604] spinlock_t alloc_lock;
  [608] raw_spinlock_t pi_lock;
  [612] struct plist_head pi_waiters;
  [620] struct rt_mutex_waiter *pi_blocked_on;
  [624] void *journal_info;
  [628] struct bio_list *bio_list;
  [632] struct blk_plug *plug;
  [636] struct reclaim_state *reclaim_state;
  [640] struct backing_dev_info *backing_dev_info;
  [644] struct io_context *io_context;
  [648] unsigned long ptrace_message;
  [652] siginfo_t *last_siginfo;
  [656] struct task_io_accounting ioac;
  [656] struct robust_list_head *robust_list;
  [660] struct list_head pi_state_list;
  [668] struct futex_pi_state *pi_state_cache;
  [672] struct rcu_head rcu;
  [680] struct pipe_inode_info *splice_pipe;
  [684] int nr_dirtied;
  [688] int nr_dirtied_pause;
  [692] unsigned long dirty_paused_when;
  [696] unsigned long timer_slack_ns;
  [700] unsigned long default_timer_slack_ns;
  [704] struct list_head *scm_work_list;
}
SIZE: 712

crash> mm_struct -o

struct mm_struct {
    [0] struct vm_area_struct *mmap;
    [4] struct rb_root mm_rb;
    [8] struct vm_area_struct *mmap_cache;
   [12] unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   [16] void (*unmap_area)(struct mm_struct *, unsigned long);
   [20] unsigned long mmap_base;
   [24] unsigned long task_size;
   [28] unsigned long cached_hole_size;
   [32] unsigned long free_area_cache;
   [36] pgd_t *pgd;
   [40] atomic_t mm_users;
   [44] atomic_t mm_count;
   [48] int map_count;
   [52] spinlock_t page_table_lock;
   [56] struct rw_semaphore mmap_sem;
   [72] struct list_head mmlist;
   [80] unsigned long hiwater_rss;
   [84] unsigned long hiwater_vm;
   [88] unsigned long total_vm;
   [92] unsigned long locked_vm;
   [96] unsigned long pinned_vm;
  [100] unsigned long shared_vm;
  [104] unsigned long exec_vm;
  [108] unsigned long stack_vm;
  [112] unsigned long reserved_vm;
  [116] unsigned long def_flags;
  [120] unsigned long nr_ptes;
  [124] unsigned long start_code;
  [128] unsigned long end_code;
  [132] unsigned long start_data;
  [136] unsigned long end_data;
  [140] unsigned long start_brk;
  [144] unsigned long brk;
  [148] unsigned long start_stack;
  [152] unsigned long arg_start;
  [156] unsigned long arg_end;
  [160] unsigned long env_start;
  [164] unsigned long env_end;
  [168] unsigned long saved_auxv[40];
  [328] struct mm_rss_stat rss_stat;
  [340] struct linux_binfmt *binfmt;
  [344] cpumask_var_t cpu_vm_mask_var;
  [348] mm_context_t context;
  [360] unsigned int faultstamp;
  [364] unsigned int token_priority;
  [368] unsigned int last_interval;
  [372] unsigned long flags;
  [376] struct core_state *core_state;
  [380] spinlock_t ioctx_lock;
  [384] struct hlist_head ioctx_list;
  [388] struct file *exe_file;
  [392] unsigned long num_exe_file_vmas;
}

下面的问题是怎样设怎样维护每CPU runqueues的?

1. 怎样把唤醒的task 加入run queue?

2. 怎样把睡眠的task中从runqueue中删除



你可能感兴趣的:(scheduler 怎样得到 next task_struct)