Cpufreq Ondemand

1: drivers/cpufreq/cpufreq_ondemand.c


A:


static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
{
  /* We want all CPUs to do sampling nearly on same jiffy */
  int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
  if (num_online_cpus() > 1)
    delay -= jiffies % delay;
  dbs_info->sample_type = DBS_NORMAL_SAMPLE;
  INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);  // 简单说就是: dbs_info->work = do_dbs_timer


  (1)            

    static void do_dbs_timer(struct work_struct *work)
    {
      struct cpu_dbs_info_s *dbs_info =
        container_of(work, struct cpu_dbs_info_s, work.work);

      unsigned int cpu = dbs_info->cpu;
      int sample_type = dbs_info->sample_type;
      int delay;

      mutex_lock(&dbs_info->timer_mutex);

      /* Common NORMAL_SAMPLE setup */

      dbs_info->sample_type = DBS_NORMAL_SAMPLE;
      if (!dbs_tuners_ins.powersave_bias ||
          sample_type == DBS_NORMAL_SAMPLE) {
        dbs_check_cpu(dbs_info);   /////////////////// 这里,做了cpufreq 的升降操作
        if (dbs_info->freq_lo) {
          /* Setup timer for SUB_SAMPLE */
          dbs_info->sample_type = DBS_SUB_SAMPLE;
          delay = dbs_info->freq_hi_jiffies;
        } else {
          /* We want all CPUs to do sampling nearly on
           * same jiffy
           */
          delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate
              * dbs_info->rate_mult);
          if (num_online_cpus() > 1)
            delay -= jiffies % delay;
        }
      } else {
        __cpufreq_driver_target(dbs_info->cur_policy,
            dbs_info->freq_lo, CPUFREQ_RELATION_H);
        delay = dbs_info->freq_lo_jiffies;
      }
      schedule_delayed_work_on(cpu, &dbs_info->work, delay);
      mutex_unlock(&dbs_info->timer_mutex);
    }
C:
  static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
  {
    unsigned int max_load_freq;
    struct cpufreq_policy *policy;
    unsigned int j;
    this_dbs_info->freq_lo = 0;
    policy = this_dbs_info->cur_policy;
    /*
     * Every sampling_rate, we check, if current idle time is less
     * than 20% (default), then we try to increase frequency
     * Every sampling_rate, we look for a the lowest
     * frequency which can sustain the load while keeping idle time over
     * 30%. If such a frequency exist, we try to decrease to this frequency.
     *
     * Any frequency increase takes it to the maximum frequency.
     * Frequency reduction happens at minimum steps of
     * 5% (default) of current frequency
     */
    /* Get Absolute Load - in terms of freq */
    max_load_freq = 0;
    for_each_cpu(j, policy->cpus) {
      struct cpu_dbs_info_s *j_dbs_info;
      cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time;
      unsigned int idle_time, wall_time, iowait_time;
      unsigned int load, load_freq;
      int freq_avg;
      j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
      cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
      cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time);
      wall_time = (unsigned int) cputime64_sub(cur_wall_time,
          j_dbs_info->prev_cpu_wall);
      j_dbs_info->prev_cpu_wall = cur_wall_time;

      idle_time = (unsigned int) cputime64_sub(cur_idle_time,
          j_dbs_info->prev_cpu_idle);
      j_dbs_info->prev_cpu_idle = cur_idle_time;
      iowait_time = (unsigned int) cputime64_sub(cur_iowait_time,
          j_dbs_info->prev_cpu_iowait);
      j_dbs_info->prev_cpu_iowait = cur_iowait_time;
      if (dbs_tuners_ins.ignore_nice) {
        cputime64_t cur_nice;
        unsigned long cur_nice_jiffies;
        cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
            j_dbs_info->prev_cpu_nice);
        /*
         * Assumption: nice time between sampling periods will
         * be less than 2^32 jiffies for 32 bit sys
         */
        cur_nice_jiffies = (unsigned long)
          cputime64_to_jiffies64(cur_nice);
        j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
        idle_time += jiffies_to_usecs(cur_nice_jiffies);
      }
      /*
       * For the purpose of ondemand, waiting for disk IO is an
       * indication that you're performance critical, and not that
       * the system is actually idle. So subtract the iowait time
       * from the cpu idle time.
       */
      if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time)
        idle_time -= iowait_time;
      if (unlikely(!wall_time || wall_time < idle_time))
        continue;
      load = 100 * (wall_time - idle_time) / wall_time;     ////// cpu load 的百分比
      freq_avg = __cpufreq_driver_getavg(policy, j);
      if (freq_avg <= 0)
        freq_avg = policy->cur;
      load_freq = load * freq_avg;
      if (load_freq > max_load_freq)
        max_load_freq = load_freq;
    }
    /* Check for frequency increase */
    if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
      /* If switching to max speed, apply sampling_down_factor */
      if (policy->cur < policy->max)
        this_dbs_info->rate_mult =
          dbs_tuners_ins.sampling_down_factor;
      dbs_freq_increase(policy, policy->max);
      return;
    }

    /* Check for frequency decrease */
    /* if we cannot reduce the frequency anymore, break out early */
    if (policy->cur == policy->min)
      return;

    /*
     * The optimal frequency is the frequency that is the lowest that
     * can support the current CPU usage without triggering the up
     * policy. To be safe, we focus 10 points under the threshold.
     */
    if (max_load_freq <
          (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
          policy->cur) {
        unsigned int freq_next;
        freq_next = max_load_freq /
          (dbs_tuners_ins.up_threshold -
           dbs_tuners_ins.down_differential);
        /* No longer fully busy, reset rate_mult */
        this_dbs_info->rate_mult = 1;
        if (freq_next < policy->min)
          freq_next = policy->min;

        if (!dbs_tuners_ins.powersave_bias) {
          __cpufreq_driver_target(policy, freq_next,
              CPUFREQ_RELATION_L);
        } else {
          int freq = powersave_bias_target(policy, freq_next,
              CPUFREQ_RELATION_L);
          __cpufreq_driver_target(policy, freq,
              CPUFREQ_RELATION_L);
        }
      }
  }

  2:drivers/cpufreq/cpufreq.c

    A:
    int __cpufreq_driver_target(struct cpufreq_policy *policy,   unsigned int target_freq, unsigned int relation)
    {
      int retval = -EINVAL;
      pr_debug("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
          target_freq, relation);
      if (cpu_online(policy->cpu) && cpufreq_driver->target)
        retval = cpufreq_driver->target(policy, target_freq, relation);
      return retval;
    }

  3:drivers/cpufreq/speedstep-ich.c

    A:

    static struct cpufreq_driver speedstep_driver = {
      .name = "speedstep-ich",
      .verify = speedstep_verify,
      .target = speedstep_target,
      .init = speedstep_cpu_init,
      .exit = speedstep_cpu_exit,
      .get  = speedstep_get,
      .owner  = THIS_MODULE,
      .attr = speedstep_attr,
    };

   B:

  /**
   * speedstep_target - set a new CPUFreq policy
   * @policy: new policy
   * @target_freq: the target frequency
   * @relation: how that frequency relates to achieved frequency
   *  (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H)
   *
   * Sets a new CPUFreq policy.
   */
  static int speedstep_target(struct cpufreq_policy *policy,
      unsigned int target_freq,
      unsigned int relation)
  {
    unsigned int newstate = 0, policy_cpu;
    struct cpufreq_freqs freqs;
    int i;

    if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0],
          target_freq, relation, &newstate))
      return -EINVAL;

    policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask);
    freqs.old = speedstep_get(policy_cpu);
    freqs.new = speedstep_freqs[newstate].frequency;
    freqs.cpu = policy->cpu;
    pr_debug("transiting from %u to %u kHz\n", freqs.old, freqs.new);
    /* no transition necessary */
    if (freqs.old == freqs.new)
      return 0;

    for_each_cpu(i, policy->cpus) {
      freqs.cpu = i;
      cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
    }
    smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate,
        true);
    for_each_cpu(i, policy->cpus) {
      freqs.cpu = i;
      cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
    }

    return 0;
  }

   C:

  /**
   * speedstep_init - initializes the SpeedStep CPUFreq driver
   *
   * Initializes the SpeedStep support. Returns -ENODEV on unsupported
   * devices, -EINVAL on problems during initiatization, and zero on
   * success.
   */
  static int __init speedstep_init(void)
  {
    /* detect processor */
    speedstep_processor = speedstep_detect_processor();
    if (!speedstep_processor) {
      pr_debug("Intel(R) SpeedStep(TM) capable processor "
          "not found\n");
      return -ENODEV;
    }
    /* detect chipset */
    if (!speedstep_detect_chipset()) {
      pr_debug("Intel(R) SpeedStep(TM) for this chipset not "
          "(yet) available.\n");
      return -ENODEV;
    }
    /* activate speedstep support */
    if (speedstep_activate()) {
      pci_dev_put(speedstep_chipset_dev);
      return -EINVAL;
    }
    if (speedstep_find_register())
      return -ENODEV;
    return cpufreq_register_driver(&speedstep_driver);
  }

 4:drivers/cpufreq/cpufreq.c

    A:

    /**
     * cpufreq_notify_transition - call notifier chain and adjust_jiffies
     * on frequency transition.
     *
     * This function calls the transition notifiers and the "adjust_jiffies"
     * function. It is called twice on all CPU frequency changes that have
     * external effects.
     */    
    void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
    {
      struct cpufreq_policy *policy; 
      BUG_ON(irqs_disabled());
      freqs->flags = cpufreq_driver->flags;
      pr_debug("notification %u of frequency transition to %u kHz\n",
          state, freqs->new);

      policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
      switch (state) {
        case CPUFREQ_PRECHANGE:
          /* detect if the driver reported a value as "old frequency"
           * which is not equal to what the cpufreq core thinks is
           * "old frequency".       
           */
          if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
            if ((policy) && (policy->cpu == freqs->cpu) && 
                (policy->cur) && (policy->cur != freqs->old)) { 
              pr_debug("Warning: CPU frequency is"
                  " %u, cpufreq assumed %u kHz.\n",
                  freqs->old, policy->cur);      
              freqs->old = policy->cur;      
            }
          }  
          srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
              CPUFREQ_PRECHANGE, freqs);     
          adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
          break;

        case CPUFREQ_POSTCHANGE:    
          adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
          pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
              (unsigned long)freqs->cpu);    
          trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
          trace_cpu_frequency(freqs->new, freqs->cpu);
          srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
              CPUFREQ_POSTCHANGE, freqs);    
          if (likely(policy) && likely(policy->cpu == freqs->cpu))
            policy->cur = freqs->new;      
          break;
      }    
    }

   B:
  /*********************************************************************
   *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
   *********************************************************************/

  /**
   * adjust_jiffies - adjust the system "loops_per_jiffy"
   *
   * This function alters the system "loops_per_jiffy" for the clock
   * speed change. Note that loops_per_jiffy cannot be updated on SMP
   * systems as each CPU might be scaled differently. So, use the arch
   * per-CPU loops_per_jiffy value wherever possible.
   */

  #ifndef CONFIG_SMP
  static unsigned long l_p_j_ref;
  static unsigned int  l_p_j_ref_freq;
  static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
  {
    if (ci->flags & CPUFREQ_CONST_LOOPS)
      return;
    if (!l_p_j_ref_freq) {
      l_p_j_ref = loops_per_jiffy;
      l_p_j_ref_freq = ci->old;
      pr_debug("saving %lu as reference value for loops_per_jiffy; "
          "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
    }
    if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
        (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
      loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
          ci->new);
      pr_debug("scaling loops_per_jiffy to %lu "
          "for frequency %u kHz\n", loops_per_jiffy, ci->new);
    }
  }
  #else
  static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
  {
    return;
  }
  #endif

  5: include/linux/cpufreq.h

     /**
      * cpufreq_scale - "old * mult / div" calculation for large values (32-bit-arch safe)
      * @old:   old value
      * @div:   divisor
      * @mult:  multiplier
      *
      *
      *    new = old * mult / div
      */
     static inline unsigned long cpufreq_scale(unsigned long old, u_int div, u_int mult)
     {
     #if BITS_PER_LONG == 32
       u64 result = ((u64) old) * ((u64) mult);
       do_div(result, div);
       return (unsigned long) result;
     #elif BITS_PER_LONG == 64
       unsigned long result = old * ((u64) mult);
       result /= div;
       return result;
     #endif
     };

    schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);

        (1) kernel/workqueue.c                    


                A:

                /**
                 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
                 * @cpu: cpu to use
                 * @dwork: job to be done
                 * @delay: number of jiffies to wait
                 *
                 * After waiting for a given time this puts a job in the kernel-global
                 * workqueue on the specified CPU.
                 */
               
int schedule_delayed_work_on(int cpu,
                    struct delayed_work *dwork, unsigned long delay)

                {
                  return queue_delayed_work_on(cpu, system_wq, dwork, delay);
                }

              B:
              /**
               * queue_delayed_work_on - queue work on specific CPU after delay
               * @cpu: CPU number to execute work on
               * @wq: workqueue to use
               * @dwork: work to queue
               * @delay: number of jiffies to wait before queueing
               *
               * Returns 0 if @work was already on a queue, non-zero otherwise.
               */

              int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
                  struct delayed_work *dwork, unsigned long delay)

              {
                int ret = 0;
                struct timer_list *timer = &dwork->timer;
                struct work_struct *work = &dwork->work;
                if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
                  unsigned int lcpu;
                  BUG_ON(timer_pending(timer));
                  BUG_ON(!list_empty(&work->entry));

                  timer_stats_timer_set_start_info(&dwork->timer);

                  /*
                   * This stores cwq for the moment, for the timer_fn.
                   * Note that the work's gcwq is preserved to allow
                   * reentrance detection for delayed works.
                   */
                  if (!(wq->flags & WQ_UNBOUND)) {
                    struct global_cwq *gcwq = get_work_gcwq(work);
                    if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
                      lcpu = gcwq->cpu;
                    else
                      lcpu = raw_smp_processor_id();
                  } else
                    lcpu = WORK_CPU_UNBOUND;
                  set_work_cwq(work, get_cwq(lcpu, wq), 0);
                  timer->expires = jiffies + delay;
                  timer->data = (unsigned long)dwork;
                  timer->function = delayed_work_timer_fn;
                  if (unlikely(cpu >= 0))
                    add_timer_on(timer, cpu);
                  else
                    add_timer(timer);
                  ret = 1;
                }
                return ret;

              }

}

你可能感兴趣的:(timer,struct,System,transition,loops,delay)