diff options
author | Steven Rostedt <srostedt@redhat.com> | 2010-05-14 09:29:52 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2010-05-14 09:29:52 -0400 |
commit | 23e117fa44429cc054cb27d5621d64e4ced91e52 (patch) | |
tree | a4b9d0902b9c6f009b2c297515221c1b9bed3af8 /drivers/cpufreq | |
parent | 668eb65f092902eb7dd526af73d4a7f025a94612 (diff) | |
parent | a93d2f1744206827ccf416e2cdc5018aa503314e (diff) | |
download | linux-23e117fa44429cc054cb27d5621d64e4ced91e52.tar.bz2 |
Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-4
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 19 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 8 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 75 |
3 files changed, 93 insertions, 9 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 2d5d575e889d..75d293eeb3ee 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1113,6 +1113,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) unsigned int cpu = sys_dev->id; unsigned long flags; struct cpufreq_policy *data; + struct kobject *kobj; + struct completion *cmp; #ifdef CONFIG_SMP struct sys_device *cpu_sys_dev; unsigned int j; @@ -1141,10 +1143,11 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) dprintk("removing link\n"); cpumask_clear_cpu(cpu, data->cpus); spin_unlock_irqrestore(&cpufreq_driver_lock, flags); - sysfs_remove_link(&sys_dev->kobj, "cpufreq"); + kobj = &sys_dev->kobj; cpufreq_cpu_put(data); cpufreq_debug_enable_ratelimit(); unlock_policy_rwsem_write(cpu); + sysfs_remove_link(kobj, "cpufreq"); return 0; } #endif @@ -1181,7 +1184,10 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) data->governor->name, CPUFREQ_NAME_LEN); #endif cpu_sys_dev = get_cpu_sysdev(j); - sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); + kobj = &cpu_sys_dev->kobj; + unlock_policy_rwsem_write(cpu); + sysfs_remove_link(kobj, "cpufreq"); + lock_policy_rwsem_write(cpu); cpufreq_cpu_put(data); } } @@ -1192,19 +1198,22 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev) if (cpufreq_driver->target) __cpufreq_governor(data, CPUFREQ_GOV_STOP); - kobject_put(&data->kobj); + kobj = &data->kobj; + cmp = &data->kobj_unregister; + unlock_policy_rwsem_write(cpu); + kobject_put(kobj); /* we need to make sure that the underlying kobj is actually * not referenced anymore by anybody before we proceed with * unloading. */ dprintk("waiting for dropping of refcount\n"); - wait_for_completion(&data->kobj_unregister); + wait_for_completion(cmp); dprintk("wait complete\n"); + lock_policy_rwsem_write(cpu); if (cpufreq_driver->exit) cpufreq_driver->exit(data); - unlock_policy_rwsem_write(cpu); free_cpumask_var(data->related_cpus); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 599a40b25cb0..3a147874a465 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -444,6 +444,7 @@ static struct attribute_group dbs_attr_group_old = { static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { unsigned int load = 0; + unsigned int max_load = 0; unsigned int freq_target; struct cpufreq_policy *policy; @@ -501,6 +502,9 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) continue; load = 100 * (wall_time - idle_time) / wall_time; + + if (load > max_load) + max_load = load; } /* @@ -511,7 +515,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) return; /* Check for frequency increase */ - if (load > dbs_tuners_ins.up_threshold) { + if (max_load > dbs_tuners_ins.up_threshold) { this_dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ @@ -538,7 +542,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ - if (load < (dbs_tuners_ins.down_threshold - 10)) { + if (max_load < (dbs_tuners_ins.down_threshold - 10)) { freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; this_dbs_info->requested_freq -= freq_target; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index bd444dc93cf2..8e9dbdc6c700 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -73,6 +73,7 @@ enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; struct cpu_dbs_info_s { cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_iowait; cputime64_t prev_cpu_wall; cputime64_t prev_cpu_nice; struct cpufreq_policy *cur_policy; @@ -108,6 +109,7 @@ static struct dbs_tuners { unsigned int down_differential; unsigned int ignore_nice; unsigned int powersave_bias; + unsigned int io_is_busy; } dbs_tuners_ins = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, @@ -148,6 +150,16 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) return idle_time; } +static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +{ + u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + + if (iowait_time == -1ULL) + return 0; + + return iowait_time; +} + /* * Find right freq to be set now with powersave_bias on. * Returns the freq_hi to be used right now and will set freq_hi_jiffies, @@ -249,6 +261,7 @@ static ssize_t show_##file_name \ return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ } show_one(sampling_rate, sampling_rate); +show_one(io_is_busy, io_is_busy); show_one(up_threshold, up_threshold); show_one(ignore_nice_load, ignore_nice); show_one(powersave_bias, powersave_bias); @@ -299,6 +312,23 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, return count; } +static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.io_is_busy = !!input; + mutex_unlock(&dbs_mutex); + + return count; +} + static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, const char *buf, size_t count) { @@ -381,6 +411,7 @@ static struct global_attr _name = \ __ATTR(_name, 0644, show_##_name, store_##_name) define_one_rw(sampling_rate); +define_one_rw(io_is_busy); define_one_rw(up_threshold); define_one_rw(ignore_nice_load); define_one_rw(powersave_bias); @@ -392,6 +423,7 @@ static struct attribute *dbs_attributes[] = { &up_threshold.attr, &ignore_nice_load.attr, &powersave_bias.attr, + &io_is_busy.attr, NULL }; @@ -470,14 +502,15 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) for_each_cpu(j, policy->cpus) { struct cpu_dbs_info_s *j_dbs_info; - cputime64_t cur_wall_time, cur_idle_time; - unsigned int idle_time, wall_time; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; unsigned int load, load_freq; int freq_avg; j_dbs_info = &per_cpu(od_cpu_dbs_info, j); cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); wall_time = (unsigned int) cputime64_sub(cur_wall_time, j_dbs_info->prev_cpu_wall); @@ -487,6 +520,10 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) j_dbs_info->prev_cpu_idle); j_dbs_info->prev_cpu_idle = cur_idle_time; + iowait_time = (unsigned int) cputime64_sub(cur_iowait_time, + j_dbs_info->prev_cpu_iowait); + j_dbs_info->prev_cpu_iowait = cur_iowait_time; + if (dbs_tuners_ins.ignore_nice) { cputime64_t cur_nice; unsigned long cur_nice_jiffies; @@ -504,6 +541,16 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) idle_time += jiffies_to_usecs(cur_nice_jiffies); } + /* + * For the purpose of ondemand, waiting for disk IO is an + * indication that you're performance critical, and not that + * the system is actually idle. So subtract the iowait time + * from the cpu idle time. + */ + + if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + if (unlikely(!wall_time || wall_time < idle_time)) continue; @@ -617,6 +664,29 @@ static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) cancel_delayed_work_sync(&dbs_info->work); } +/* + * Not all CPUs want IO time to be accounted as busy; this dependson how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; +} + static int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { @@ -679,6 +749,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy, dbs_tuners_ins.sampling_rate = max(min_sampling_rate, latency * LATENCY_MULTIPLIER); + dbs_tuners_ins.io_is_busy = should_io_be_busy(); } mutex_unlock(&dbs_mutex); |