From 4d5dcc4211f9def4281eafb54b8ed483862e8135 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Mar 2013 15:58:58 +0000
Subject: cpufreq: governor: Implement per policy instances of governors

Currently, there can't be multiple instances of single governor_type.
If we have a multi-package system, where we have multiple instances
of struct policy (per package), we can't have multiple instances of
same governor. i.e. We can't have multiple instances of ondemand
governor for multiple packages.

Governors directory in sysfs is created at /sys/devices/system/cpu/cpufreq/
governor-name/. Which again reflects that there can be only one
instance of a governor_type in the system.

This is a bottleneck for multicluster system, where we want different
packages to use same governor type, but with different tunables.

This patch uses the infrastructure provided by earlier patch and
implements init/exit routines for ondemand and conservative
governors.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.h | 117 ++++++++++++++++++++++++++++++++-----
 1 file changed, 102 insertions(+), 15 deletions(-)

(limited to 'drivers/cpufreq/cpufreq_governor.h')

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 46bde01eee62..c83cabf14b2f 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -40,14 +40,75 @@
 /* Ondemand Sampling types */
 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
-/* Macro creating sysfs show routines */
-#define show_one(_gov, file_name, object)				\
-static ssize_t show_##file_name						\
+/*
+ * Macro for creating governors sysfs routines
+ *
+ * - gov_sys: One governor instance per whole system
+ * - gov_pol: One governor instance per policy
+ */
+
+/* Create attributes */
+#define gov_sys_attr_ro(_name)						\
+static struct global_attr _name##_gov_sys =				\
+__ATTR(_name, 0444, show_##_name##_gov_sys, NULL)
+
+#define gov_sys_attr_rw(_name)						\
+static struct global_attr _name##_gov_sys =				\
+__ATTR(_name, 0644, show_##_name##_gov_sys, store_##_name##_gov_sys)
+
+#define gov_pol_attr_ro(_name)						\
+static struct freq_attr _name##_gov_pol =				\
+__ATTR(_name, 0444, show_##_name##_gov_pol, NULL)
+
+#define gov_pol_attr_rw(_name)						\
+static struct freq_attr _name##_gov_pol =				\
+__ATTR(_name, 0644, show_##_name##_gov_pol, store_##_name##_gov_pol)
+
+#define gov_sys_pol_attr_rw(_name)					\
+	gov_sys_attr_rw(_name);						\
+	gov_pol_attr_rw(_name)
+
+#define gov_sys_pol_attr_ro(_name)					\
+	gov_sys_attr_ro(_name);						\
+	gov_pol_attr_ro(_name)
+
+/* Create show/store routines */
+#define show_one(_gov, file_name)					\
+static ssize_t show_##file_name##_gov_sys				\
 (struct kobject *kobj, struct attribute *attr, char *buf)		\
 {									\
-	return sprintf(buf, "%u\n", _gov##_tuners.object);		\
+	struct _gov##_dbs_tuners *tuners = _gov##_dbs_cdata.gdbs_data->tuners; \
+	return sprintf(buf, "%u\n", tuners->file_name);			\
+}									\
+									\
+static ssize_t show_##file_name##_gov_pol					\
+(struct cpufreq_policy *policy, char *buf)				\
+{									\
+	struct dbs_data *dbs_data = policy->governor_data;		\
+	struct _gov##_dbs_tuners *tuners = dbs_data->tuners;		\
+	return sprintf(buf, "%u\n", tuners->file_name);			\
+}
+
+#define store_one(_gov, file_name)					\
+static ssize_t store_##file_name##_gov_sys				\
+(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count)	\
+{									\
+	struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data;		\
+	return store_##file_name(dbs_data, buf, count);			\
+}									\
+									\
+static ssize_t store_##file_name##_gov_pol				\
+(struct cpufreq_policy *policy, const char *buf, size_t count)		\
+{									\
+	struct dbs_data *dbs_data = policy->governor_data;		\
+	return store_##file_name(dbs_data, buf, count);			\
 }
 
+#define show_store_one(_gov, file_name)					\
+show_one(_gov, file_name);						\
+store_one(_gov, file_name)
+
+/* create helper routines */
 #define define_get_cpu_dbs_routines(_dbs_info)				\
 static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu)		\
 {									\
@@ -103,7 +164,7 @@ struct cs_cpu_dbs_info_s {
 	unsigned int enable:1;
 };
 
-/* Governers sysfs tunables */
+/* Per policy Governers sysfs tunables */
 struct od_dbs_tuners {
 	unsigned int ignore_nice;
 	unsigned int sampling_rate;
@@ -123,31 +184,42 @@ struct cs_dbs_tuners {
 	unsigned int freq_step;
 };
 
-/* Per Governer data */
-struct dbs_data {
+/* Common Governer data across policies */
+struct dbs_data;
+struct common_dbs_data {
 	/* Common across governors */
 	#define GOV_ONDEMAND		0
 	#define GOV_CONSERVATIVE	1
 	int governor;
-	unsigned int min_sampling_rate;
-	struct attribute_group *attr_group;
-	void *tuners;
+	struct attribute_group *attr_group_gov_sys; /* one governor - system */
+	struct attribute_group *attr_group_gov_pol; /* one governor - policy */
 
-	/* dbs_mutex protects dbs_enable in governor start/stop */
-	struct mutex mutex;
+	/* Common data for platforms that don't set have_governor_per_policy */
+	struct dbs_data *gdbs_data;
 
 	struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu);
 	void *(*get_cpu_dbs_info_s)(int cpu);
 	void (*gov_dbs_timer)(struct work_struct *work);
 	void (*gov_check_cpu)(int cpu, unsigned int load);
+	int (*init)(struct dbs_data *dbs_data);
+	void (*exit)(struct dbs_data *dbs_data);
 
 	/* Governor specific ops, see below */
 	void *gov_ops;
 };
 
+/* Governer Per policy data */
+struct dbs_data {
+	struct common_dbs_data *cdata;
+	unsigned int min_sampling_rate;
+	void *tuners;
+
+	/* dbs_mutex protects dbs_enable in governor start/stop */
+	struct mutex mutex;
+};
+
 /* Governor specific ops, will be passed to dbs_data->gov_ops */
 struct od_ops {
-	int (*io_busy)(void);
 	void (*powersave_bias_init_cpu)(int cpu);
 	unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
 			unsigned int freq_next, unsigned int relation);
@@ -169,10 +241,25 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate)
 	return delay;
 }
 
+#define declare_show_sampling_rate_min(_gov)				\
+static ssize_t show_sampling_rate_min_gov_sys				\
+(struct kobject *kobj, struct attribute *attr, char *buf)		\
+{									\
+	struct dbs_data *dbs_data = _gov##_dbs_cdata.gdbs_data;		\
+	return sprintf(buf, "%u\n", dbs_data->min_sampling_rate);	\
+}									\
+									\
+static ssize_t show_sampling_rate_min_gov_pol				\
+(struct cpufreq_policy *policy, char *buf)				\
+{									\
+	struct dbs_data *dbs_data = policy->governor_data;		\
+	return sprintf(buf, "%u\n", dbs_data->min_sampling_rate);	\
+}
+
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 bool need_load_eval(struct cpu_dbs_common_info *cdbs,
 		unsigned int sampling_rate);
-int cpufreq_governor_dbs(struct dbs_data *dbs_data,
-		struct cpufreq_policy *policy, unsigned int event);
+int cpufreq_governor_dbs(struct cpufreq_policy *policy,
+		struct common_dbs_data *cdata, unsigned int event);
 #endif /* _CPUFREQ_GOVERNER_H */
-- 
cgit v1.2.3


From 98104ee28f451024170a9dfb7bec31bfcb7e7c14 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 26 Feb 2013 15:07:24 +0530
Subject: cpufreq: governor: Set MIN_LATENCY_MULTIPLIER to 20

Currently MIN_LATENCY_MULTIPLIER is set defined as 100 and so on a system with
transition latency of 1 ms, the minimum sampling time comes to be around 100 ms.
That is quite big if you want to get better performance for your system.

Redefine MIN_LATENCY_MULTIPLIER to 20 so that we can support 20ms sampling rate
for such platforms.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/cpufreq/cpufreq_governor.h')

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index c83cabf14b2f..27b588aeacc1 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -34,7 +34,7 @@
  */
 #define MIN_SAMPLING_RATE_RATIO			(2)
 #define LATENCY_MULTIPLIER			(1000)
-#define MIN_LATENCY_MULTIPLIER			(100)
+#define MIN_LATENCY_MULTIPLIER			(20)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
 
 /* Ondemand Sampling types */
-- 
cgit v1.2.3


From 031299b3be30f3ecab110fff8faad85af70e1797 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 27 Feb 2013 12:24:03 +0530
Subject: cpufreq: governors: Avoid unnecessary per cpu timer interrupts

Following patch has introduced per cpu timers or works for ondemand and
conservative governors.

	commit 2abfa876f1117b0ab45f191fb1f82c41b1cbc8fe
	Author: Rickard Andersson <rickard.andersson@stericsson.com>
	Date:   Thu Dec 27 14:55:38 2012 +0000

	    cpufreq: handle SW coordinated CPUs

This causes additional unnecessary interrupts on all cpus when the load is
recently evaluated by any other cpu. i.e. When load is recently evaluated by cpu
x, we don't really need any other cpu to evaluate this load again for the next
sampling_rate time.

Some sort of code is present to avoid that but we are still getting timer
interrupts for all cpus. A good way of avoiding this would be to modify delays
for all cpus (policy->cpus) whenever any cpu has evaluated load.

This patch does this change and some related code cleanup.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c |  8 ++++---
 drivers/cpufreq/cpufreq_governor.c     | 39 ++++++++++++++++++++++++----------
 drivers/cpufreq/cpufreq_governor.h     |  2 ++
 drivers/cpufreq/cpufreq_ondemand.c     | 12 ++++++-----
 4 files changed, 42 insertions(+), 19 deletions(-)

(limited to 'drivers/cpufreq/cpufreq_governor.h')

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 98b49462f4e9..6fe6050a3889 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -107,7 +107,6 @@ static void cs_check_cpu(int cpu, unsigned int load)
 
 static void cs_dbs_timer(struct work_struct *work)
 {
-	struct delayed_work *dw = to_delayed_work(work);
 	struct cs_cpu_dbs_info_s *dbs_info = container_of(work,
 			struct cs_cpu_dbs_info_s, cdbs.work.work);
 	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
@@ -116,12 +115,15 @@ static void cs_dbs_timer(struct work_struct *work)
 	struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	int delay = delay_for_sampling_rate(cs_tuners->sampling_rate);
+	bool modify_all = true;
 
 	mutex_lock(&core_dbs_info->cdbs.timer_mutex);
-	if (need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate))
+	if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate))
+		modify_all = false;
+	else
 		dbs_check_cpu(dbs_data, cpu);
 
-	schedule_delayed_work_on(smp_processor_id(), dw, delay);
+	gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all);
 	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
 }
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 26fbb729bc1c..326f0c2e2bd5 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -178,20 +178,38 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
-static inline void dbs_timer_init(struct dbs_data *dbs_data, int cpu,
-				  unsigned int sampling_rate)
+static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
+		unsigned int delay)
 {
-	int delay = delay_for_sampling_rate(sampling_rate);
 	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
 
-	schedule_delayed_work_on(cpu, &cdbs->work, delay);
+	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
 }
 
-static inline void dbs_timer_exit(struct dbs_data *dbs_data, int cpu)
+void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
+		unsigned int delay, bool all_cpus)
 {
-	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
+	int i;
+
+	if (!all_cpus) {
+		__gov_queue_work(smp_processor_id(), dbs_data, delay);
+	} else {
+		for_each_cpu(i, policy->cpus)
+			__gov_queue_work(i, dbs_data, delay);
+	}
+}
+EXPORT_SYMBOL_GPL(gov_queue_work);
+
+static inline void gov_cancel_work(struct dbs_data *dbs_data,
+		struct cpufreq_policy *policy)
+{
+	struct cpu_dbs_common_info *cdbs;
+	int i;
 
-	cancel_delayed_work_sync(&cdbs->work);
+	for_each_cpu(i, policy->cpus) {
+		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
+		cancel_delayed_work_sync(&cdbs->work);
+	}
 }
 
 /* Will return if we need to evaluate cpu load again or not */
@@ -380,16 +398,15 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		/* Initiate timer time stamp */
 		cpu_cdbs->time_stamp = ktime_get();
 
-		for_each_cpu(j, policy->cpus)
-			dbs_timer_init(dbs_data, j, sampling_rate);
+		gov_queue_work(dbs_data, policy,
+				delay_for_sampling_rate(sampling_rate), true);
 		break;
 
 	case CPUFREQ_GOV_STOP:
 		if (dbs_data->cdata->governor == GOV_CONSERVATIVE)
 			cs_dbs_info->enable = 0;
 
-		for_each_cpu(j, policy->cpus)
-			dbs_timer_exit(dbs_data, j);
+		gov_cancel_work(dbs_data, policy);
 
 		mutex_lock(&dbs_data->mutex);
 		mutex_destroy(&cpu_cdbs->timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 27b588aeacc1..c9c269f2b973 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -262,4 +262,6 @@ bool need_load_eval(struct cpu_dbs_common_info *cdbs,
 		unsigned int sampling_rate);
 int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
+void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
+		unsigned int delay, bool all_cpus);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c90d345c636a..459f9ee39c74 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -216,7 +216,6 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
 
 static void od_dbs_timer(struct work_struct *work)
 {
-	struct delayed_work *dw = to_delayed_work(work);
 	struct od_cpu_dbs_info_s *dbs_info =
 		container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work);
 	unsigned int cpu = dbs_info->cdbs.cur_policy->cpu;
@@ -225,10 +224,13 @@ static void od_dbs_timer(struct work_struct *work)
 	struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	int delay = 0, sample_type = core_dbs_info->sample_type;
+	bool modify_all = true;
 
 	mutex_lock(&core_dbs_info->cdbs.timer_mutex);
-	if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate))
+	if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) {
+		modify_all = false;
 		goto max_delay;
+	}
 
 	/* Common NORMAL_SAMPLE setup */
 	core_dbs_info->sample_type = OD_NORMAL_SAMPLE;
@@ -250,7 +252,7 @@ max_delay:
 		delay = delay_for_sampling_rate(od_tuners->sampling_rate
 				* core_dbs_info->rate_mult);
 
-	schedule_delayed_work_on(smp_processor_id(), dw, delay);
+	gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all);
 	mutex_unlock(&core_dbs_info->cdbs.timer_mutex);
 }
 
@@ -310,8 +312,8 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
 			cancel_delayed_work_sync(&dbs_info->cdbs.work);
 			mutex_lock(&dbs_info->cdbs.timer_mutex);
 
-			schedule_delayed_work_on(cpu, &dbs_info->cdbs.work,
-					usecs_to_jiffies(new_rate));
+			gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy,
+					usecs_to_jiffies(new_rate), true);
 
 		}
 		mutex_unlock(&dbs_info->cdbs.timer_mutex);
-- 
cgit v1.2.3


From 9366d84052e7c5b2eca804c08cfcd00b490f4de2 Mon Sep 17 00:00:00 2001
From: Stratos Karafotis <stratosk@semaphore.gr>
Date: Thu, 28 Feb 2013 16:57:32 +0000
Subject: cpufreq: governors: Calculate iowait time only when necessary

Currently we always calculate the CPU iowait time and add it to idle time.
If we are in ondemand and we use io_is_busy, we re-calculate iowait time
and we subtract it from idle time.

With this patch iowait time is calculated only when necessary avoiding
the double call to get_cpu_iowait_time_us. We use a parameter in
function get_cpu_idle_time to distinguish when the iowait time will be
added to idle time or not, without the need of keeping the prev_io_wait.

Signed-off-by: Stratos Karafotis <stratosk@semaphore.gr>
Acked-by: Viresh Kumar <viresh.kumar@linaro.,org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_conservative.c |  2 +-
 drivers/cpufreq/cpufreq_governor.c     | 48 +++++++++++++---------------------
 drivers/cpufreq/cpufreq_governor.h     |  3 +--
 drivers/cpufreq/cpufreq_ondemand.c     | 11 +++++++-
 4 files changed, 30 insertions(+), 34 deletions(-)

(limited to 'drivers/cpufreq/cpufreq_governor.h')

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index a4af9c3f309e..52f76b19a883 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -240,7 +240,7 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf,
 		struct cs_cpu_dbs_info_s *dbs_info;
 		dbs_info = &per_cpu(cs_cpu_dbs_info, j);
 		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
-						&dbs_info->cdbs.prev_cpu_wall);
+					&dbs_info->cdbs.prev_cpu_wall, 0);
 		if (cs_tuners->ignore_nice)
 			dbs_info->cdbs.prev_cpu_nice =
 				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 326f0c2e2bd5..443442df113b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -67,13 +67,13 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 	return cputime_to_usecs(idle_time);
 }
 
-u64 get_cpu_idle_time(unsigned int cpu, u64 *wall)
+u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
 {
-	u64 idle_time = get_cpu_idle_time_us(cpu, NULL);
+	u64 idle_time = get_cpu_idle_time_us(cpu, io_busy ? wall : NULL);
 
 	if (idle_time == -1ULL)
 		return get_cpu_idle_time_jiffy(cpu, wall);
-	else
+	else if (!io_busy)
 		idle_time += get_cpu_iowait_time_us(cpu, wall);
 
 	return idle_time;
@@ -100,13 +100,22 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 	/* Get Absolute Load (in terms of freq for ondemand gov) */
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_common_info *j_cdbs;
-		u64 cur_wall_time, cur_idle_time, cur_iowait_time;
-		unsigned int idle_time, wall_time, iowait_time;
+		u64 cur_wall_time, cur_idle_time;
+		unsigned int idle_time, wall_time;
 		unsigned int load;
+		int io_busy = 0;
 
 		j_cdbs = dbs_data->cdata->get_cpu_cdbs(j);
 
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
+		/*
+		 * For the purpose of ondemand, waiting for disk IO is
+		 * an indication that you're performance critical, and
+		 * not that the system is actually idle. So do not add
+		 * the iowait time to the cpu idle time.
+		 */
+		if (dbs_data->cdata->governor == GOV_ONDEMAND)
+			io_busy = od_tuners->io_is_busy;
+		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
 
 		wall_time = (unsigned int)
 			(cur_wall_time - j_cdbs->prev_cpu_wall);
@@ -134,29 +143,6 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 			idle_time += jiffies_to_usecs(cur_nice_jiffies);
 		}
 
-		if (dbs_data->cdata->governor == GOV_ONDEMAND) {
-			struct od_cpu_dbs_info_s *od_j_dbs_info =
-				dbs_data->cdata->get_cpu_dbs_info_s(cpu);
-
-			cur_iowait_time = get_cpu_iowait_time_us(j,
-					&cur_wall_time);
-			if (cur_iowait_time == -1ULL)
-				cur_iowait_time = 0;
-
-			iowait_time = (unsigned int) (cur_iowait_time -
-					od_j_dbs_info->prev_cpu_iowait);
-			od_j_dbs_info->prev_cpu_iowait = cur_iowait_time;
-
-			/*
-			 * For the purpose of ondemand, waiting for disk IO is
-			 * an indication that you're performance critical, and
-			 * not that the system is actually idle. So subtract the
-			 * iowait time from the cpu idle time.
-			 */
-			if (od_tuners->io_is_busy && idle_time >= iowait_time)
-				idle_time -= iowait_time;
-		}
-
 		if (unlikely(!wall_time || wall_time < idle_time))
 			continue;
 
@@ -254,6 +240,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 	struct cs_dbs_tuners *cs_tuners = NULL;
 	struct cpu_dbs_common_info *cpu_cdbs;
 	unsigned int sampling_rate, latency, ignore_nice, j, cpu = policy->cpu;
+	int io_busy = 0;
 	int rc;
 
 	if (have_governor_per_policy())
@@ -353,6 +340,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		sampling_rate = od_tuners->sampling_rate;
 		ignore_nice = od_tuners->ignore_nice;
 		od_ops = dbs_data->cdata->gov_ops;
+		io_busy = od_tuners->io_is_busy;
 	}
 
 	switch (event) {
@@ -369,7 +357,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_cdbs->cpu = j;
 			j_cdbs->cur_policy = policy;
 			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j,
-					&j_cdbs->prev_cpu_wall);
+					       &j_cdbs->prev_cpu_wall, io_busy);
 			if (ignore_nice)
 				j_cdbs->prev_cpu_nice =
 					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index c9c269f2b973..513cc8234e5e 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -148,7 +148,6 @@ struct cpu_dbs_common_info {
 
 struct od_cpu_dbs_info_s {
 	struct cpu_dbs_common_info cdbs;
-	u64 prev_cpu_iowait;
 	struct cpufreq_frequency_table *freq_table;
 	unsigned int freq_lo;
 	unsigned int freq_lo_jiffies;
@@ -256,7 +255,7 @@ static ssize_t show_sampling_rate_min_gov_pol				\
 	return sprintf(buf, "%u\n", dbs_data->min_sampling_rate);	\
 }
 
-u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
+u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
 bool need_load_eval(struct cpu_dbs_common_info *cdbs,
 		unsigned int sampling_rate);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 459f9ee39c74..14714787b724 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -339,11 +339,20 @@ static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
+	unsigned int j;
 
 	ret = sscanf(buf, "%u", &input);
 	if (ret != 1)
 		return -EINVAL;
 	od_tuners->io_is_busy = !!input;
+
+	/* we need to re-evaluate prev_cpu_idle */
+	for_each_online_cpu(j) {
+		struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
+									j);
+		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
+			&dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
+	}
 	return count;
 }
 
@@ -414,7 +423,7 @@ static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf,
 		struct od_cpu_dbs_info_s *dbs_info;
 		dbs_info = &per_cpu(od_cpu_dbs_info, j);
 		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
-						&dbs_info->cdbs.prev_cpu_wall);
+			&dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
 		if (od_tuners->ignore_nice)
 			dbs_info->cdbs.prev_cpu_nice =
 				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
-- 
cgit v1.2.3


From fb30809efa3edeb692a6b29125a07c9eceb322dc Mon Sep 17 00:00:00 2001
From: Jacob Shin <jacob.shin@amd.com>
Date: Tue, 2 Apr 2013 09:56:56 -0500
Subject: cpufreq: ondemand: allow custom powersave_bias_target handler to be
 registered

This allows for another [arch specific] driver to hook into existing
powersave bias function of the ondemand governor. i.e. This allows AMD
specific powersave bias function (in a separate AMD specific driver)
to aid ondemand governor's frequency transition decisions.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
Acked-by: Thomas Renninger <trenn@suse.de>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.h |  4 +++
 drivers/cpufreq/cpufreq_ondemand.c | 61 +++++++++++++++++++++++++++++++++-----
 2 files changed, 57 insertions(+), 8 deletions(-)

(limited to 'drivers/cpufreq/cpufreq_governor.h')

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 513cc8234e5e..9f668e94dead 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -263,4 +263,8 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		struct common_dbs_data *cdata, unsigned int event);
 void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 		unsigned int delay, bool all_cpus);
+void od_register_powersave_bias_handler(unsigned int (*f)
+		(struct cpufreq_policy *, unsigned int, unsigned int),
+		unsigned int powersave_bias);
+void od_unregister_powersave_bias_handler(void);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 14714787b724..b0ffef96bf77 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -24,6 +24,7 @@
 #include <linux/sysfs.h>
 #include <linux/tick.h>
 #include <linux/types.h>
+#include <linux/cpu.h>
 
 #include "cpufreq_governor.h"
 
@@ -40,6 +41,8 @@
 
 static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info);
 
+static struct od_ops od_ops;
+
 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
 static struct cpufreq_governor cpufreq_gov_ondemand;
 #endif
@@ -80,7 +83,7 @@ static int should_io_be_busy(void)
  * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
  * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
  */
-static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
 		unsigned int freq_next, unsigned int relation)
 {
 	unsigned int freq_req, freq_reduc, freq_avg;
@@ -145,7 +148,8 @@ static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq)
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 
 	if (od_tuners->powersave_bias)
-		freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H);
+		freq = od_ops.powersave_bias_target(p, freq,
+				CPUFREQ_RELATION_H);
 	else if (p->cur == p->max)
 		return;
 
@@ -205,12 +209,12 @@ static void od_check_cpu(int cpu, unsigned int load_freq)
 		if (!od_tuners->powersave_bias) {
 			__cpufreq_driver_target(policy, freq_next,
 					CPUFREQ_RELATION_L);
-		} else {
-			int freq = powersave_bias_target(policy, freq_next,
-					CPUFREQ_RELATION_L);
-			__cpufreq_driver_target(policy, freq,
-					CPUFREQ_RELATION_L);
+			return;
 		}
+
+		freq_next = od_ops.powersave_bias_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
 	}
 }
 
@@ -557,7 +561,7 @@ define_get_cpu_dbs_routines(od_cpu_dbs_info);
 
 static struct od_ops od_ops = {
 	.powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu,
-	.powersave_bias_target = powersave_bias_target,
+	.powersave_bias_target = generic_powersave_bias_target,
 	.freq_increase = dbs_freq_increase,
 };
 
@@ -574,6 +578,47 @@ static struct common_dbs_data od_dbs_cdata = {
 	.exit = od_exit,
 };
 
+static void od_set_powersave_bias(unsigned int powersave_bias)
+{
+	struct cpufreq_policy *policy;
+	struct dbs_data *dbs_data;
+	struct od_dbs_tuners *od_tuners;
+	unsigned int cpu;
+	cpumask_t done;
+
+	cpumask_clear(&done);
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, &done))
+			continue;
+
+		policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy;
+		dbs_data = policy->governor_data;
+		od_tuners = dbs_data->tuners;
+		od_tuners->powersave_bias = powersave_bias;
+
+		cpumask_or(&done, &done, policy->cpus);
+	}
+	put_online_cpus();
+}
+
+void od_register_powersave_bias_handler(unsigned int (*f)
+		(struct cpufreq_policy *, unsigned int, unsigned int),
+		unsigned int powersave_bias)
+{
+	od_ops.powersave_bias_target = f;
+	od_set_powersave_bias(powersave_bias);
+}
+EXPORT_SYMBOL_GPL(od_register_powersave_bias_handler);
+
+void od_unregister_powersave_bias_handler(void)
+{
+	od_ops.powersave_bias_target = generic_powersave_bias_target;
+	od_set_powersave_bias(0);
+}
+EXPORT_SYMBOL_GPL(od_unregister_powersave_bias_handler);
+
 static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		unsigned int event)
 {
-- 
cgit v1.2.3