diff options
| -rw-r--r-- | Documentation/thermal/power_allocator.txt | 247 | ||||
| -rw-r--r-- | drivers/thermal/Kconfig | 15 | ||||
| -rw-r--r-- | drivers/thermal/Makefile | 1 | ||||
| -rw-r--r-- | drivers/thermal/power_allocator.c | 520 | ||||
| -rw-r--r-- | drivers/thermal/thermal_core.c | 9 | ||||
| -rw-r--r-- | drivers/thermal/thermal_core.h | 8 | ||||
| -rw-r--r-- | include/linux/thermal.h | 37 | 
7 files changed, 830 insertions, 7 deletions
diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt new file mode 100644 index 000000000000..c3797b529991 --- /dev/null +++ b/Documentation/thermal/power_allocator.txt @@ -0,0 +1,247 @@ +Power allocator governor tunables +================================= + +Trip points +----------- + +The governor requires the following two passive trip points: + +1.  "switch on" trip point: temperature above which the governor +    control loop starts operating.  This is the first passive trip +    point of the thermal zone. + +2.  "desired temperature" trip point: it should be higher than the +    "switch on" trip point.  This the target temperature the governor +    is controlling for.  This is the last passive trip point of the +    thermal zone. + +PID Controller +-------------- + +The power allocator governor implements a +Proportional-Integral-Derivative controller (PID controller) with +temperature as the control input and power as the controlled output: + +    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power + +where +    e = desired_temperature - current_temperature +    err_integral is the sum of previous errors +    diff_err = e - previous_error + +It is similar to the one depicted below: + +                                      k_d +                                       | +current_temp                           | +     |                                 v +     |                +----------+   +---+ +     |         +----->| diff_err |-->| X |------+ +     |         |      +----------+   +---+      | +     |         |                                |      tdp        actor +     |         |                      k_i       |       |  get_requested_power() +     |         |                       |        |       |        |     | +     |         |                       |        |       |        |     | ... +     v         |                       v        v       v        v     v +   +---+       |      +-------+      +---+    +---+   +---+   +----------+ +   | S |-------+----->| sum e |----->| X |--->| S |-->| S |-->|power     | +   +---+       |      +-------+      +---+    +---+   +---+   |allocation| +     ^         |                                ^             +----------+ +     |         |                                |                |     | +     |         |        +---+                   |                |     | +     |         +------->| X |-------------------+                v     v +     |                  +---+                               granted performance +desired_temperature       ^ +                          | +                          | +                      k_po/k_pu + +Sustainable power +----------------- + +An estimate of the sustainable dissipatable power (in mW) should be +provided while registering the thermal zone.  This estimates the +sustained power that can be dissipated at the desired control +temperature.  This is the maximum sustained power for allocation at +the desired maximum temperature.  The actual sustained power can vary +for a number of reasons.  The closed loop controller will take care of +variations such as environmental conditions, and some factors related +to the speed-grade of the silicon.  `sustainable_power` is therefore +simply an estimate, and may be tuned to affect the aggressiveness of +the thermal ramp. For reference, the sustainable power of a 4" phone +is typically 2000mW, while on a 10" tablet is around 4500mW (may vary +depending on screen size). + +If you are using device tree, do add it as a property of the +thermal-zone.  For example: + +	thermal-zones { +		soc_thermal { +			polling-delay = <1000>; +			polling-delay-passive = <100>; +			sustainable-power = <2500>; +			... + +Instead, if the thermal zone is registered from the platform code, pass a +`thermal_zone_params` that has a `sustainable_power`.  If no +`thermal_zone_params` were being passed, then something like below +will suffice: + +	static const struct thermal_zone_params tz_params = { +		.sustainable_power = 3500, +	}; + +and then pass `tz_params` as the 5th parameter to +`thermal_zone_device_register()` + +k_po and k_pu +------------- + +The implementation of the PID controller in the power allocator +thermal governor allows the configuration of two proportional term +constants: `k_po` and `k_pu`.  `k_po` is the proportional term +constant during temperature overshoot periods (current temperature is +above "desired temperature" trip point).  Conversely, `k_pu` is the +proportional term constant during temperature undershoot periods +(current temperature below "desired temperature" trip point). + +These controls are intended as the primary mechanism for configuring +the permitted thermal "ramp" of the system.  For instance, a lower +`k_pu` value will provide a slower ramp, at the cost of capping +available capacity at a low temperature.  On the other hand, a high +value of `k_pu` will result in the governor granting very high power +whilst temperature is low, and may lead to temperature overshooting. + +The default value for `k_pu` is: + +    2 * sustainable_power / (desired_temperature - switch_on_temp) + +This means that at `switch_on_temp` the output of the controller's +proportional term will be 2 * `sustainable_power`.  The default value +for `k_po` is: + +    sustainable_power / (desired_temperature - switch_on_temp) + +Focusing on the proportional and feed forward values of the PID +controller equation we have: + +    P_max = k_p * e + sustainable_power + +The proportional term is proportional to the difference between the +desired temperature and the current one.  When the current temperature +is the desired one, then the proportional component is zero and +`P_max` = `sustainable_power`.  That is, the system should operate in +thermal equilibrium under constant load.  `sustainable_power` is only +an estimate, which is the reason for closed-loop control such as this. + +Expanding `k_pu` we get: +    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) + +        sustainable_power + +where +    T_set is the desired temperature +    T is the current temperature +    T_on is the switch on temperature + +When the current temperature is the switch_on temperature, the above +formula becomes: + +    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) + +        sustainable_power = 2 * sustainable_power + sustainable_power = +        3 * sustainable_power + +Therefore, the proportional term alone linearly decreases power from +3 * `sustainable_power` to `sustainable_power` as the temperature +rises from the switch on temperature to the desired temperature. + +k_i and integral_cutoff +----------------------- + +`k_i` configures the PID loop's integral term constant.  This term +allows the PID controller to compensate for long term drift and for +the quantized nature of the output control: cooling devices can't set +the exact power that the governor requests.  When the temperature +error is below `integral_cutoff`, errors are accumulated in the +integral term.  This term is then multiplied by `k_i` and the result +added to the output of the controller.  Typically `k_i` is set low (1 +or 2) and `integral_cutoff` is 0. + +k_d +--- + +`k_d` configures the PID loop's derivative term constant.  It's +recommended to leave it as the default: 0. + +Cooling device power API +======================== + +Cooling devices controlled by this governor must supply the additional +"power" API in their `cooling_device_ops`.  It consists on three ops: + +1. int get_requested_power(struct thermal_cooling_device *cdev, +	struct thermal_zone_device *tz, u32 *power); +@cdev: The `struct thermal_cooling_device` pointer +@tz: thermal zone in which we are currently operating +@power: pointer in which to store the calculated power + +`get_requested_power()` calculates the power requested by the device +in milliwatts and stores it in @power .  It should return 0 on +success, -E* on failure.  This is currently used by the power +allocator governor to calculate how much power to give to each cooling +device. + +2. int state2power(struct thermal_cooling_device *cdev, struct +        thermal_zone_device *tz, unsigned long state, u32 *power); +@cdev: The `struct thermal_cooling_device` pointer +@tz: thermal zone in which we are currently operating +@state: A cooling device state +@power: pointer in which to store the equivalent power + +Convert cooling device state @state into power consumption in +milliwatts and store it in @power.  It should return 0 on success, -E* +on failure.  This is currently used by thermal core to calculate the +maximum power that an actor can consume. + +3. int power2state(struct thermal_cooling_device *cdev, u32 power, +	unsigned long *state); +@cdev: The `struct thermal_cooling_device` pointer +@power: power in milliwatts +@state: pointer in which to store the resulting state + +Calculate a cooling device state that would make the device consume at +most @power mW and store it in @state.  It should return 0 on success, +-E* on failure.  This is currently used by the thermal core to convert +a given power set by the power allocator governor to a state that the +cooling device can set.  It is a function because this conversion may +depend on external factors that may change so this function should the +best conversion given "current circumstances". + +Cooling device weights +---------------------- + +Weights are a mechanism to bias the allocation among cooling +devices.  They express the relative power efficiency of different +cooling devices.  Higher weight can be used to express higher power +efficiency.  Weighting is relative such that if each cooling device +has a weight of one they are considered equal.  This is particularly +useful in heterogeneous systems where two cooling devices may perform +the same kind of compute, but with different efficiency.  For example, +a system with two different types of processors. + +If the thermal zone is registered using +`thermal_zone_device_register()` (i.e., platform code), then weights +are passed as part of the thermal zone's `thermal_bind_parameters`. +If the platform is registered using device tree, then they are passed +as the `contribution` property of each map in the `cooling-maps` node. + +Limitations of the power allocator governor +=========================================== + +The power allocator governor's PID controller works best if there is a +periodic tick.  If you have a driver that calls +`thermal_zone_device_update()` (or anything that ends up calling the +governor's `throttle()` function) repetitively, the governor response +won't be very good.  Note that this is not particular to this +governor, step-wise will also misbehave if you call its throttle() +faster than the normal thermal framework tick (due to interrupts for +example) as it will overreact. diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 30aee81e9f5b..a1b43eab0a70 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -71,6 +71,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE  	  Select this if you want to let the user space manage the  	  platform thermals. +config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR +	bool "power_allocator" +	select THERMAL_GOV_POWER_ALLOCATOR +	help +	  Select this if you want to control temperature based on +	  system and device power allocation. This governor can only +	  operate on cooling devices that implement the power API. +  endchoice  config THERMAL_GOV_FAIR_SHARE @@ -99,6 +107,13 @@ config THERMAL_GOV_USER_SPACE  	help  	  Enable this to let the user space manage the platform thermals. +config THERMAL_GOV_POWER_ALLOCATOR +	bool "Power allocator thermal governor" +	select THERMAL_POWER_ACTOR +	help +	  Enable this to manage platform thermals by dynamically +	  allocating and limiting power to devices. +  config CPU_THERMAL  	bool "generic cpu cooling support"  	depends on CPU_FREQ diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 1fe86652cfb6..b1783cf37ed2 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -14,6 +14,7 @@ thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)	+= fair_share.o  thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG)	+= gov_bang_bang.o  thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE)	+= step_wise.o  thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o +thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR)	+= power_allocator.o  # cpufreq cooling  thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c new file mode 100644 index 000000000000..67982d79b76c --- /dev/null +++ b/drivers/thermal/power_allocator.c @@ -0,0 +1,520 @@ +/* + * A power allocator to manage temperature + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "Power allocator: " fmt + +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#include "thermal_core.h" + +#define FRAC_BITS 10 +#define int_to_frac(x) ((x) << FRAC_BITS) +#define frac_to_int(x) ((x) >> FRAC_BITS) + +/** + * mul_frac() - multiply two fixed-point numbers + * @x:	first multiplicand + * @y:	second multiplicand + * + * Return: the result of multiplying two fixed-point numbers.  The + * result is also a fixed-point number. + */ +static inline s64 mul_frac(s64 x, s64 y) +{ +	return (x * y) >> FRAC_BITS; +} + +/** + * div_frac() - divide two fixed-point numbers + * @x:	the dividend + * @y:	the divisor + * + * Return: the result of dividing two fixed-point numbers.  The + * result is also a fixed-point number. + */ +static inline s64 div_frac(s64 x, s64 y) +{ +	return div_s64(x << FRAC_BITS, y); +} + +/** + * struct power_allocator_params - parameters for the power allocator governor + * @err_integral:	accumulated error in the PID controller. + * @prev_err:	error in the previous iteration of the PID controller. + *		Used to calculate the derivative term. + * @trip_switch_on:	first passive trip point of the thermal zone.  The + *			governor switches on when this trip point is crossed. + * @trip_max_desired_temperature:	last passive trip point of the thermal + *					zone.  The temperature we are + *					controlling for. + */ +struct power_allocator_params { +	s64 err_integral; +	s32 prev_err; +	int trip_switch_on; +	int trip_max_desired_temperature; +}; + +/** + * pid_controller() - PID controller + * @tz:	thermal zone we are operating in + * @current_temp:	the current temperature in millicelsius + * @control_temp:	the target temperature in millicelsius + * @max_allocatable_power:	maximum allocatable power for this thermal zone + * + * This PID controller increases the available power budget so that the + * temperature of the thermal zone gets as close as possible to + * @control_temp and limits the power if it exceeds it.  k_po is the + * proportional term when we are overshooting, k_pu is the + * proportional term when we are undershooting.  integral_cutoff is a + * threshold below which we stop accumulating the error.  The + * accumulated error is only valid if the requested power will make + * the system warmer.  If the system is mostly idle, there's no point + * in accumulating positive error. + * + * Return: The power budget for the next period. + */ +static u32 pid_controller(struct thermal_zone_device *tz, +			  unsigned long current_temp, +			  unsigned long control_temp, +			  u32 max_allocatable_power) +{ +	s64 p, i, d, power_range; +	s32 err, max_power_frac; +	struct power_allocator_params *params = tz->governor_data; + +	max_power_frac = int_to_frac(max_allocatable_power); + +	err = ((s32)control_temp - (s32)current_temp); +	err = int_to_frac(err); + +	/* Calculate the proportional term */ +	p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); + +	/* +	 * Calculate the integral term +	 * +	 * if the error is less than cut off allow integration (but +	 * the integral is limited to max power) +	 */ +	i = mul_frac(tz->tzp->k_i, params->err_integral); + +	if (err < int_to_frac(tz->tzp->integral_cutoff)) { +		s64 i_next = i + mul_frac(tz->tzp->k_i, err); + +		if (abs64(i_next) < max_power_frac) { +			i = i_next; +			params->err_integral += err; +		} +	} + +	/* +	 * Calculate the derivative term +	 * +	 * We do err - prev_err, so with a positive k_d, a decreasing +	 * error (i.e. driving closer to the line) results in less +	 * power being applied, slowing down the controller) +	 */ +	d = mul_frac(tz->tzp->k_d, err - params->prev_err); +	d = div_frac(d, tz->passive_delay); +	params->prev_err = err; + +	power_range = p + i + d; + +	/* feed-forward the known sustainable dissipatable power */ +	power_range = tz->tzp->sustainable_power + frac_to_int(power_range); + +	return clamp(power_range, (s64)0, (s64)max_allocatable_power); +} + +/** + * divvy_up_power() - divvy the allocated power between the actors + * @req_power:	each actor's requested power + * @max_power:	each actor's maximum available power + * @num_actors:	size of the @req_power, @max_power and @granted_power's array + * @total_req_power: sum of @req_power + * @power_range:	total allocated power + * @granted_power:	output array: each actor's granted power + * @extra_actor_power:	an appropriately sized array to be used in the + *			function as temporary storage of the extra power given + *			to the actors + * + * This function divides the total allocated power (@power_range) + * fairly between the actors.  It first tries to give each actor a + * share of the @power_range according to how much power it requested + * compared to the rest of the actors.  For example, if only one actor + * requests power, then it receives all the @power_range.  If + * three actors each requests 1mW, each receives a third of the + * @power_range. + * + * If any actor received more than their maximum power, then that + * surplus is re-divvied among the actors based on how far they are + * from their respective maximums. + * + * Granted power for each actor is written to @granted_power, which + * should've been allocated by the calling function. + */ +static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, +			   u32 total_req_power, u32 power_range, +			   u32 *granted_power, u32 *extra_actor_power) +{ +	u32 extra_power, capped_extra_power; +	int i; + +	/* +	 * Prevent division by 0 if none of the actors request power. +	 */ +	if (!total_req_power) +		total_req_power = 1; + +	capped_extra_power = 0; +	extra_power = 0; +	for (i = 0; i < num_actors; i++) { +		u64 req_range = req_power[i] * power_range; + +		granted_power[i] = div_u64(req_range, total_req_power); + +		if (granted_power[i] > max_power[i]) { +			extra_power += granted_power[i] - max_power[i]; +			granted_power[i] = max_power[i]; +		} + +		extra_actor_power[i] = max_power[i] - granted_power[i]; +		capped_extra_power += extra_actor_power[i]; +	} + +	if (!extra_power) +		return; + +	/* +	 * Re-divvy the reclaimed extra among actors based on +	 * how far they are from the max +	 */ +	extra_power = min(extra_power, capped_extra_power); +	if (capped_extra_power > 0) +		for (i = 0; i < num_actors; i++) +			granted_power[i] += (extra_actor_power[i] * +					extra_power) / capped_extra_power; +} + +static int allocate_power(struct thermal_zone_device *tz, +			  unsigned long current_temp, +			  unsigned long control_temp) +{ +	struct thermal_instance *instance; +	struct power_allocator_params *params = tz->governor_data; +	u32 *req_power, *max_power, *granted_power, *extra_actor_power; +	u32 total_req_power, max_allocatable_power; +	u32 power_range; +	int i, num_actors, total_weight, ret = 0; +	int trip_max_desired_temperature = params->trip_max_desired_temperature; + +	mutex_lock(&tz->lock); + +	num_actors = 0; +	total_weight = 0; +	list_for_each_entry(instance, &tz->thermal_instances, tz_node) { +		if ((instance->trip == trip_max_desired_temperature) && +		    cdev_is_power_actor(instance->cdev)) { +			num_actors++; +			total_weight += instance->weight; +		} +	} + +	/* +	 * We need to allocate three arrays of the same size: +	 * req_power, max_power and granted_power.  They are going to +	 * be needed until this function returns.  Allocate them all +	 * in one go to simplify the allocation and deallocation +	 * logic. +	 */ +	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power)); +	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power)); +	BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); +	req_power = devm_kcalloc(&tz->device, num_actors * 4, +				 sizeof(*req_power), GFP_KERNEL); +	if (!req_power) { +		ret = -ENOMEM; +		goto unlock; +	} + +	max_power = &req_power[num_actors]; +	granted_power = &req_power[2 * num_actors]; +	extra_actor_power = &req_power[3 * num_actors]; + +	i = 0; +	total_req_power = 0; +	max_allocatable_power = 0; + +	list_for_each_entry(instance, &tz->thermal_instances, tz_node) { +		int weight; +		struct thermal_cooling_device *cdev = instance->cdev; + +		if (instance->trip != trip_max_desired_temperature) +			continue; + +		if (!cdev_is_power_actor(cdev)) +			continue; + +		if (cdev->ops->get_requested_power(cdev, tz, &req_power[i])) +			continue; + +		if (!total_weight) +			weight = 1 << FRAC_BITS; +		else +			weight = instance->weight; + +		req_power[i] = frac_to_int(weight * req_power[i]); + +		if (power_actor_get_max_power(cdev, tz, &max_power[i])) +			continue; + +		total_req_power += req_power[i]; +		max_allocatable_power += max_power[i]; + +		i++; +	} + +	power_range = pid_controller(tz, current_temp, control_temp, +				     max_allocatable_power); + +	divvy_up_power(req_power, max_power, num_actors, total_req_power, +		       power_range, granted_power, extra_actor_power); + +	i = 0; +	list_for_each_entry(instance, &tz->thermal_instances, tz_node) { +		if (instance->trip != trip_max_desired_temperature) +			continue; + +		if (!cdev_is_power_actor(instance->cdev)) +			continue; + +		power_actor_set_power(instance->cdev, instance, +				      granted_power[i]); + +		i++; +	} + +	devm_kfree(&tz->device, req_power); +unlock: +	mutex_unlock(&tz->lock); + +	return ret; +} + +static int get_governor_trips(struct thermal_zone_device *tz, +			      struct power_allocator_params *params) +{ +	int i, ret, last_passive; +	bool found_first_passive; + +	found_first_passive = false; +	last_passive = -1; +	ret = -EINVAL; + +	for (i = 0; i < tz->trips; i++) { +		enum thermal_trip_type type; + +		ret = tz->ops->get_trip_type(tz, i, &type); +		if (ret) +			return ret; + +		if (!found_first_passive) { +			if (type == THERMAL_TRIP_PASSIVE) { +				params->trip_switch_on = i; +				found_first_passive = true; +			} +		} else if (type == THERMAL_TRIP_PASSIVE) { +			last_passive = i; +		} else { +			break; +		} +	} + +	if (last_passive != -1) { +		params->trip_max_desired_temperature = last_passive; +		ret = 0; +	} else { +		ret = -EINVAL; +	} + +	return ret; +} + +static void reset_pid_controller(struct power_allocator_params *params) +{ +	params->err_integral = 0; +	params->prev_err = 0; +} + +static void allow_maximum_power(struct thermal_zone_device *tz) +{ +	struct thermal_instance *instance; +	struct power_allocator_params *params = tz->governor_data; + +	list_for_each_entry(instance, &tz->thermal_instances, tz_node) { +		if ((instance->trip != params->trip_max_desired_temperature) || +		    (!cdev_is_power_actor(instance->cdev))) +			continue; + +		instance->target = 0; +		instance->cdev->updated = false; +		thermal_cdev_update(instance->cdev); +	} +} + +/** + * power_allocator_bind() - bind the power_allocator governor to a thermal zone + * @tz:	thermal zone to bind it to + * + * Check that the thermal zone is valid for this governor, that is, it + * has two thermal trips.  If so, initialize the PID controller + * parameters and bind it to the thermal zone. + * + * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM + * if we ran out of memory. + */ +static int power_allocator_bind(struct thermal_zone_device *tz) +{ +	int ret; +	struct power_allocator_params *params; +	unsigned long switch_on_temp, control_temp; +	u32 temperature_threshold; + +	if (!tz->tzp || !tz->tzp->sustainable_power) { +		dev_err(&tz->device, +			"power_allocator: missing sustainable_power\n"); +		return -EINVAL; +	} + +	params = devm_kzalloc(&tz->device, sizeof(*params), GFP_KERNEL); +	if (!params) +		return -ENOMEM; + +	ret = get_governor_trips(tz, params); +	if (ret) { +		dev_err(&tz->device, +			"thermal zone %s has wrong trip setup for power allocator\n", +			tz->type); +		goto free; +	} + +	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, +				     &switch_on_temp); +	if (ret) +		goto free; + +	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, +				     &control_temp); +	if (ret) +		goto free; + +	temperature_threshold = control_temp - switch_on_temp; + +	tz->tzp->k_po = tz->tzp->k_po ?: +		int_to_frac(tz->tzp->sustainable_power) / temperature_threshold; +	tz->tzp->k_pu = tz->tzp->k_pu ?: +		int_to_frac(2 * tz->tzp->sustainable_power) / +		temperature_threshold; +	tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000; +	/* +	 * The default for k_d and integral_cutoff is 0, so we can +	 * leave them as they are. +	 */ + +	reset_pid_controller(params); + +	tz->governor_data = params; + +	return 0; + +free: +	devm_kfree(&tz->device, params); +	return ret; +} + +static void power_allocator_unbind(struct thermal_zone_device *tz) +{ +	dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); +	devm_kfree(&tz->device, tz->governor_data); +	tz->governor_data = NULL; +} + +static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) +{ +	int ret; +	unsigned long switch_on_temp, control_temp, current_temp; +	struct power_allocator_params *params = tz->governor_data; + +	/* +	 * We get called for every trip point but we only need to do +	 * our calculations once +	 */ +	if (trip != params->trip_max_desired_temperature) +		return 0; + +	ret = thermal_zone_get_temp(tz, ¤t_temp); +	if (ret) { +		dev_warn(&tz->device, "Failed to get temperature: %d\n", ret); +		return ret; +	} + +	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, +				     &switch_on_temp); +	if (ret) { +		dev_warn(&tz->device, +			 "Failed to get switch on temperature: %d\n", ret); +		return ret; +	} + +	if (current_temp < switch_on_temp) { +		tz->passive = 0; +		reset_pid_controller(params); +		allow_maximum_power(tz); +		return 0; +	} + +	tz->passive = 1; + +	ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, +				&control_temp); +	if (ret) { +		dev_warn(&tz->device, +			 "Failed to get the maximum desired temperature: %d\n", +			 ret); +		return ret; +	} + +	return allocate_power(tz, current_temp, control_temp); +} + +static struct thermal_governor thermal_gov_power_allocator = { +	.name		= "power_allocator", +	.bind_to_tz	= power_allocator_bind, +	.unbind_from_tz	= power_allocator_unbind, +	.throttle	= power_allocator_throttle, +}; + +int thermal_gov_power_allocator_register(void) +{ +	return thermal_register_governor(&thermal_gov_power_allocator); +} + +void thermal_gov_power_allocator_unregister(void) +{ +	thermal_unregister_governor(&thermal_gov_power_allocator); +} diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 263628b0e862..b389bc2ec0fa 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1616,7 +1616,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz)  struct thermal_zone_device *thermal_zone_device_register(const char *type,  	int trips, int mask, void *devdata,  	struct thermal_zone_device_ops *ops, -	const struct thermal_zone_params *tzp, +	struct thermal_zone_params *tzp,  	int passive_delay, int polling_delay)  {  	struct thermal_zone_device *tz; @@ -1968,7 +1968,11 @@ static int __init thermal_register_governors(void)  	if (result)  		return result; -	return thermal_gov_user_space_register(); +	result = thermal_gov_user_space_register(); +	if (result) +		return result; + +	return thermal_gov_power_allocator_register();  }  static void thermal_unregister_governors(void) @@ -1977,6 +1981,7 @@ static void thermal_unregister_governors(void)  	thermal_gov_fair_share_unregister();  	thermal_gov_bang_bang_unregister();  	thermal_gov_user_space_unregister(); +	thermal_gov_power_allocator_unregister();  }  static int __init thermal_init(void) diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index faebe881f062..8a6624488cc5 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -88,6 +88,14 @@ static inline int thermal_gov_user_space_register(void) { return 0; }  static inline void thermal_gov_user_space_unregister(void) {}  #endif /* CONFIG_THERMAL_GOV_USER_SPACE */ +#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR +int thermal_gov_power_allocator_register(void); +void thermal_gov_power_allocator_unregister(void); +#else +static inline int thermal_gov_power_allocator_register(void) { return 0; } +static inline void thermal_gov_power_allocator_unregister(void) {} +#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */ +  /* device tree support */  #ifdef CONFIG_THERMAL_OF  int of_parse_thermal_zones(void); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index bf3c55f405c2..6bbe11c97cea 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -59,6 +59,8 @@  #define DEFAULT_THERMAL_GOVERNOR       "fair_share"  #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE)  #define DEFAULT_THERMAL_GOVERNOR       "user_space" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) +#define DEFAULT_THERMAL_GOVERNOR       "power_allocator"  #endif  struct thermal_zone_device; @@ -154,8 +156,7 @@ struct thermal_attr {   * @devdata:	private pointer for device private data   * @trips:	number of trip points the thermal zone supports   * @passive_delay:	number of milliseconds to wait between polls when - *			performing passive cooling.  Currenty only used by the - *			step-wise governor + *			performing passive cooling.   * @polling_delay:	number of milliseconds to wait between polls when   *			checking whether trip points have been crossed (0 for   *			interrupt driven systems) @@ -165,7 +166,6 @@ struct thermal_attr {   * @last_temperature:	previous temperature read   * @emul_temperature:	emulated temperature when using CONFIG_THERMAL_EMULATION   * @passive:		1 if you've crossed a passive trip point, 0 otherwise. - *			Currenty only used by the step-wise governor.   * @forced_passive:	If > 0, temperature at which to switch on all ACPI   *			processor cooling devices.  Currently only used by the   *			step-wise governor. @@ -197,7 +197,7 @@ struct thermal_zone_device {  	int passive;  	unsigned int forced_passive;  	struct thermal_zone_device_ops *ops; -	const struct thermal_zone_params *tzp; +	struct thermal_zone_params *tzp;  	struct thermal_governor *governor;  	void *governor_data;  	struct list_head thermal_instances; @@ -275,6 +275,33 @@ struct thermal_zone_params {  	int num_tbps;	/* Number of tbp entries */  	struct thermal_bind_params *tbp; + +	/* +	 * Sustainable power (heat) that this thermal zone can dissipate in +	 * mW +	 */ +	u32 sustainable_power; + +	/* +	 * Proportional parameter of the PID controller when +	 * overshooting (i.e., when temperature is below the target) +	 */ +	s32 k_po; + +	/* +	 * Proportional parameter of the PID controller when +	 * undershooting +	 */ +	s32 k_pu; + +	/* Integral parameter of the PID controller */ +	s32 k_i; + +	/* Derivative parameter of the PID controller */ +	s32 k_d; + +	/* threshold below which the error is no longer accumulated */ +	s32 integral_cutoff;  };  struct thermal_genl_event { @@ -350,7 +377,7 @@ int power_actor_set_power(struct thermal_cooling_device *,  			  struct thermal_instance *, u32);  struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,  		void *, struct thermal_zone_device_ops *, -		const struct thermal_zone_params *, int, int); +		struct thermal_zone_params *, int, int);  void thermal_zone_device_unregister(struct thermal_zone_device *);  int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int,  |