diff options
23 files changed, 1501 insertions, 208 deletions
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt index 9e3c3b33514c..0a94224ad296 100644 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ b/Documentation/cpu-freq/pcc-cpufreq.txt @@ -159,8 +159,8 @@ to be strictly associated with a P-state. 2.2 cpuinfo_transition_latency: ------------------------------- -The cpuinfo_transition_latency field is 0. The PCC specification does -not include a field to expose this value currently. +The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification +does not include a field to expose this value currently. 2.3 cpuinfo_cur_freq: --------------------- diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index 3a07a87fef20..6aca64f289b6 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -242,6 +242,23 @@ nodes to be present and contain the properties described below. Definition: Specifies the syscon node controlling the cpu core power domains. + - dynamic-power-coefficient + Usage: optional + Value type: <prop-encoded-array> + Definition: A u32 value that represents the running time dynamic + power coefficient in units of mW/MHz/uVolt^2. The + coefficient can either be calculated from power + measurements or derived by analysis. + + The dynamic power consumption of the CPU is + proportional to the square of the Voltage (V) and + the clock frequency (f). The coefficient is used to + calculate the dynamic power as below - + + Pdyn = dynamic-power-coefficient * V^2 * f + + where voltage is in uV, frequency is in MHz. + Example 1 (dual-cluster big.LITTLE system 32-bit): cpus { diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt new file mode 100644 index 000000000000..d91a02a3b6b0 --- /dev/null +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-st.txt @@ -0,0 +1,91 @@ +Binding for ST's CPUFreq driver +=============================== + +ST's CPUFreq driver attempts to read 'process' and 'version' attributes +from the SoC, then supplies the OPP framework with 'prop' and 'supported +hardware' information respectively. The framework is then able to read +the DT and operate in the usual way. + +For more information about the expected DT format [See: ../opp/opp.txt]. + +Frequency Scaling only +---------------------- + +No vendor specific driver required for this. + +Located in CPU's node: + +- operating-points : [See: ../power/opp.txt] + +Example [safe] +-------------- + +cpus { + cpu@0 { + /* kHz uV */ + operating-points = <1500000 0 + 1200000 0 + 800000 0 + 500000 0>; + }; +}; + +Dynamic Voltage and Frequency Scaling (DVFS) +-------------------------------------------- + +This requires the ST CPUFreq driver to supply 'process' and 'version' info. + +Located in CPU's node: + +- operating-points-v2 : [See ../power/opp.txt] + +Example [unsafe] +---------------- + +cpus { + cpu@0 { + operating-points-v2 = <&cpu0_opp_table>; + }; +}; + +cpu0_opp_table: opp_table { + compatible = "operating-points-v2"; + + /* ############################################################### */ + /* # WARNING: Do not attempt to copy/replicate these nodes, # */ + /* # they are only to be supplied by the bootloader !!! # */ + /* ############################################################### */ + opp0 { + /* Major Minor Substrate */ + /* 2 all all */ + opp-supported-hw = <0x00000004 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1500000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1200000>; + opp-microvolt-pcode1 = <1200000>; + opp-microvolt-pcode2 = <1200000>; + opp-microvolt-pcode3 = <1200000>; + opp-microvolt-pcode4 = <1170000>; + opp-microvolt-pcode5 = <1140000>; + opp-microvolt-pcode6 = <1100000>; + opp-microvolt-pcode7 = <1070000>; + }; + + opp1 { + /* Major Minor Substrate */ + /* all all all */ + opp-supported-hw = <0xffffffff 0xffffffff 0xffffffff>; + opp-hz = /bits/ 64 <1200000000>; + clock-latency-ns = <10000000>; + + opp-microvolt-pcode0 = <1110000>; + opp-microvolt-pcode1 = <1150000>; + opp-microvolt-pcode2 = <1100000>; + opp-microvolt-pcode3 = <1080000>; + opp-microvolt-pcode4 = <1040000>; + opp-microvolt-pcode5 = <1020000>; + opp-microvolt-pcode6 = <980000>; + opp-microvolt-pcode7 = <930000>; + }; +}; diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 0cb44dc21f97..601256fe8c0d 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with phandle to a OPP table in their DT node. The OPP core will use this phandle to find the operating points for the device. -Devices may want to choose OPP tables at runtime and so can provide a list of -phandles here. But only *one* of them should be chosen at runtime. This must be -accompanied by a corresponding "operating-points-names" property, to uniquely -identify the OPP tables. - If required, this can be extended for SoC vendor specfic bindings. Such bindings should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt and should have a compatible description like: "operating-points-v2-<vendor>". -Optional properties: -- operating-points-names: Names of OPP tables (required if multiple OPP - tables are present), to uniquely identify them. The same list must be present - for all the CPUs which are sharing clock/voltage rails and hence the OPP - tables. - * OPP Table Node This describes the OPPs belonging to a device. This node can have following @@ -100,6 +89,14 @@ Optional properties: Entries for multiple regulators must be present in the same order as regulators are specified in device's DT node. +- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to + the above opp-microvolt property, but allows multiple voltage ranges to be + provided for the same OPP. At runtime, the platform can pick a <name> and + matching opp-microvolt-<name> property will be enabled for all OPPs. If the + platform doesn't pick a specific <name> or the <name> doesn't match with any + opp-microvolt-<name> properties, then opp-microvolt property shall be used, if + present. + - opp-microamp: The maximum current drawn by the device in microamperes considering system specific parameters (such as transients, process, aging, maximum operating temperature range etc.) as necessary. This may be used to @@ -112,6 +109,9 @@ Optional properties: for few regulators, then this should be marked as zero for them. If it isn't required for any regulator, then this property need not be present. +- opp-microamp-<name>: Named opp-microamp property. Similar to + opp-microvolt-<name> property, but for microamp instead. + - clock-latency-ns: Specifies the maximum possible transition latency (in nanoseconds) for switching to this OPP from any other OPP. @@ -123,6 +123,26 @@ Optional properties: - opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in the table should have this. +- opp-supported-hw: This enables us to select only a subset of OPPs from the + larger OPP table, based on what version of the hardware we are running on. We + still can't have multiple nodes with the same opp-hz value in OPP table. + + It's an user defined array containing a hierarchy of hardware version numbers, + supported by the OPP. For example: a platform with hierarchy of three levels + of versions (A, B and C), this field should be like <X Y Z>, where X + corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z + corresponds to version hierarchy C. + + Each level of hierarchy is represented by a 32 bit value, and so there can be + only 32 different supported version per hierarchy. i.e. 1 bit per version. A + value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy + level. And a value of 0x00000000 will disable the OPP completely, and so we + never want that to happen. + + If 32 values aren't sufficient for a version hierarchy, than that version + hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the + above example, Z1 & Z2 refer to the version hierarchy Z. + - status: Marks the node enabled/disabled. Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. @@ -157,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; clock-latency-ns = <290000>; @@ -236,20 +256,20 @@ independently. * independently. */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000; @@ -312,20 +332,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>; opp-microamp = <70000>; clock-latency-ns = <300000>; opp-suspend; }; - opp01 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <980000 1000000 1010000>; opp-microamp = <80000>; clock-latency-ns = <310000>; }; - opp02 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1025000>; opp-microamp = <90000>; @@ -338,20 +358,20 @@ DVFS state together. compatible = "operating-points-v2"; opp-shared; - opp10 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1045000 1050000 1055000>; opp-microamp = <95000>; clock-latency-ns = <400000>; opp-suspend; }; - opp11 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1075000>; opp-microamp = <100000>; clock-latency-ns = <400000>; }; - opp12 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1010000 1100000 1110000>; opp-microamp = <95000>; @@ -378,7 +398,7 @@ Example 4: Handling multiple regulators compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000>, /* Supply 0 */ <960000>, /* Supply 1 */ @@ -391,7 +411,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -404,7 +424,7 @@ Example 4: Handling multiple regulators /* OR */ - opp00 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <970000 975000 985000>, /* Supply 0 */ <960000 965000 975000>, /* Supply 1 */ @@ -417,7 +437,8 @@ Example 4: Handling multiple regulators }; }; -Example 5: Multiple OPP tables +Example 5: opp-supported-hw +(example: three level hierarchy of versions: cuts, substrate and process) / { cpus { @@ -426,40 +447,73 @@ Example 5: Multiple OPP tables ... cpu-supply = <&cpu_supply> - operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>; - operating-points-names = "slow", "fast"; + operating-points-v2 = <&cpu0_opp_table_slow>; }; }; - cpu0_opp_table_slow: opp_table_slow { + opp_table { compatible = "operating-points-v2"; status = "okay"; opp-shared; - opp00 { + opp@600000000 { + /* + * Supports all substrate and process versions for 0xF + * cuts, i.e. only first four cuts. + */ + opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF> opp-hz = /bits/ 64 <600000000>; + opp-microvolt = <900000 915000 925000>; ... }; - opp01 { + opp@800000000 { + /* + * Supports: + * - cuts: only one, 6th cut (represented by 6th bit). + * - substrate: supports 16 different substrate versions + * - process: supports 9 different process versions + */ + opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0> opp-hz = /bits/ 64 <800000000>; + opp-microvolt = <900000 915000 925000>; ... }; }; +}; + +Example 6: opp-microvolt-<name>, opp-microamp-<name>: +(example: device with two possible microvolt ranges: slow and fast) - cpu0_opp_table_fast: opp_table_fast { +/ { + cpus { + cpu@0 { + compatible = "arm,cortex-a7"; + ... + + operating-points-v2 = <&cpu0_opp_table>; + }; + }; + + cpu0_opp_table: opp_table0 { compatible = "operating-points-v2"; - status = "okay"; opp-shared; - opp10 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; - ... + opp-microvolt-slow = <900000 915000 925000>; + opp-microvolt-fast = <970000 975000 985000>; + opp-microamp-slow = <70000>; + opp-microamp-fast = <71000>; }; - opp11 { - opp-hz = /bits/ 64 <1100000000>; - ... + opp@1200000000 { + opp-hz = /bits/ 64 <1200000000>; + opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */ + <910000 925000 935000>; /* Supply vcc1 */ + opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */ + <960000 965000 975000>; /* Supply vcc1 */ + opp-microamp = <70000>; /* Will be used for both slow/fast */ }; }; }; diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 294cfe40388d..40beede46e55 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -64,73 +64,73 @@ compatible = "operating-points-v2"; opp-shared; - opp00 { + opp@200000000 { opp-hz = /bits/ 64 <200000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp01 { + opp@300000000 { opp-hz = /bits/ 64 <300000000>; opp-microvolt = <900000>; clock-latency-ns = <200000>; }; - opp02 { + opp@400000000 { opp-hz = /bits/ 64 <400000000>; opp-microvolt = <925000>; clock-latency-ns = <200000>; }; - opp03 { + opp@500000000 { opp-hz = /bits/ 64 <500000000>; opp-microvolt = <950000>; clock-latency-ns = <200000>; }; - opp04 { + opp@600000000 { opp-hz = /bits/ 64 <600000000>; opp-microvolt = <975000>; clock-latency-ns = <200000>; }; - opp05 { + opp@700000000 { opp-hz = /bits/ 64 <700000000>; opp-microvolt = <987500>; clock-latency-ns = <200000>; }; - opp06 { + opp@800000000 { opp-hz = /bits/ 64 <800000000>; opp-microvolt = <1000000>; clock-latency-ns = <200000>; opp-suspend; }; - opp07 { + opp@900000000 { opp-hz = /bits/ 64 <900000000>; opp-microvolt = <1037500>; clock-latency-ns = <200000>; }; - opp08 { + opp@1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <1087500>; clock-latency-ns = <200000>; }; - opp09 { + opp@1100000000 { opp-hz = /bits/ 64 <1100000000>; opp-microvolt = <1137500>; clock-latency-ns = <200000>; }; - opp10 { + opp@1200000000 { opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1187500>; clock-latency-ns = <200000>; }; - opp11 { + opp@1300000000 { opp-hz = /bits/ 64 <1300000000>; opp-microvolt = <1250000>; clock-latency-ns = <200000>; }; - opp12 { + opp@1400000000 { opp-hz = /bits/ 64 <1400000000>; opp-microvolt = <1287500>; clock-latency-ns = <200000>; }; - opp13 { + opp@1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <1350000>; clock-latency-ns = <200000>; diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile index 33c1e18c41a4..19837ef04d8e 100644 --- a/drivers/base/power/opp/Makefile +++ b/drivers/base/power/opp/Makefile @@ -1,2 +1,3 @@ ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG obj-y += core.o cpu.o +obj-$(CONFIG_DEBUG_FS) += debugfs.o diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index b8e76f75073b..5c01fec1ed14 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -463,6 +463,7 @@ static void _kfree_list_dev_rcu(struct rcu_head *head) static void _remove_list_dev(struct device_list_opp *list_dev, struct device_opp *dev_opp) { + opp_debug_unregister(list_dev, dev_opp); list_del(&list_dev->node); call_srcu(&dev_opp->srcu_head.srcu, &list_dev->rcu_head, _kfree_list_dev_rcu); @@ -472,6 +473,7 @@ struct device_list_opp *_add_list_dev(const struct device *dev, struct device_opp *dev_opp) { struct device_list_opp *list_dev; + int ret; list_dev = kzalloc(sizeof(*list_dev), GFP_KERNEL); if (!list_dev) @@ -481,6 +483,12 @@ struct device_list_opp *_add_list_dev(const struct device *dev, list_dev->dev = dev; list_add_rcu(&list_dev->node, &dev_opp->dev_list); + /* Create debugfs entries for the dev_opp */ + ret = opp_debug_register(list_dev, dev_opp); + if (ret) + dev_err(dev, "%s: Failed to register opp debugfs (%d)\n", + __func__, ret); + return list_dev; } @@ -551,6 +559,12 @@ static void _remove_device_opp(struct device_opp *dev_opp) if (!list_empty(&dev_opp->opp_list)) return; + if (dev_opp->supported_hw) + return; + + if (dev_opp->prop_name) + return; + list_dev = list_first_entry(&dev_opp->dev_list, struct device_list_opp, node); @@ -596,6 +610,7 @@ static void _opp_remove(struct device_opp *dev_opp, */ if (notify) srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp); + opp_debug_remove_one(opp); list_del_rcu(&opp->node); call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, _kfree_opp_rcu); @@ -673,6 +688,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, { struct dev_pm_opp *opp; struct list_head *head = &dev_opp->opp_list; + int ret; /* * Insert new OPP in order of increasing frequency and discard if @@ -703,6 +719,11 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, new_opp->dev_opp = dev_opp; list_add_rcu(&new_opp->node, head); + ret = opp_debug_create_one(new_opp, dev_opp); + if (ret) + dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n", + __func__, ret); + return 0; } @@ -776,35 +797,48 @@ unlock: } /* TODO: Support multiple regulators */ -static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev, + struct device_opp *dev_opp) { u32 microvolt[3] = {0}; u32 val; int count, ret; + struct property *prop = NULL; + char name[NAME_MAX]; + + /* Search for "opp-microvolt-<name>" */ + if (dev_opp->prop_name) { + sprintf(name, "opp-microvolt-%s", dev_opp->prop_name); + prop = of_find_property(opp->np, name, NULL); + } + + if (!prop) { + /* Search for "opp-microvolt" */ + name[13] = '\0'; + prop = of_find_property(opp->np, name, NULL); - /* Missing property isn't a problem, but an invalid entry is */ - if (!of_find_property(opp->np, "opp-microvolt", NULL)) - return 0; + /* Missing property isn't a problem, but an invalid entry is */ + if (!prop) + return 0; + } - count = of_property_count_u32_elems(opp->np, "opp-microvolt"); + count = of_property_count_u32_elems(opp->np, name); if (count < 0) { - dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n", - __func__, count); + dev_err(dev, "%s: Invalid %s property (%d)\n", + __func__, name, count); return count; } /* There can be one or three elements here */ if (count != 1 && count != 3) { - dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n", - __func__, count); + dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n", + __func__, name, count); return -EINVAL; } - ret = of_property_read_u32_array(opp->np, "opp-microvolt", microvolt, - count); + ret = of_property_read_u32_array(opp->np, name, microvolt, count); if (ret) { - dev_err(dev, "%s: error parsing opp-microvolt: %d\n", __func__, - ret); + dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret); return -EINVAL; } @@ -812,13 +846,271 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) opp->u_volt_min = microvolt[1]; opp->u_volt_max = microvolt[2]; - if (!of_property_read_u32(opp->np, "opp-microamp", &val)) + /* Search for "opp-microamp-<name>" */ + prop = NULL; + if (dev_opp->prop_name) { + sprintf(name, "opp-microamp-%s", dev_opp->prop_name); + prop = of_find_property(opp->np, name, NULL); + } + + if (!prop) { + /* Search for "opp-microamp" */ + name[12] = '\0'; + prop = of_find_property(opp->np, name, NULL); + } + + if (prop && !of_property_read_u32(opp->np, name, &val)) opp->u_amp = val; return 0; } /** + * dev_pm_opp_set_supported_hw() - Set supported platforms + * @dev: Device for which supported-hw has to be set. + * @versions: Array of hierarchy of versions to match. + * @count: Number of elements in the array. + * + * This is required only for the V2 bindings, and it enables a platform to + * specify the hierarchy of versions it supports. OPP layer will then enable + * OPPs, which are available for those versions, based on its 'opp-supported-hw' + * property. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, + unsigned int count) +{ + struct device_opp *dev_opp; + int ret = 0; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = _add_device_opp(dev); + if (!dev_opp) { + ret = -ENOMEM; + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + /* Do we already have a version hierarchy associated with dev_opp? */ + if (dev_opp->supported_hw) { + dev_err(dev, "%s: Already have supported hardware list\n", + __func__); + ret = -EBUSY; + goto err; + } + + dev_opp->supported_hw = kmemdup(versions, count * sizeof(*versions), + GFP_KERNEL); + if (!dev_opp->supported_hw) { + ret = -ENOMEM; + goto err; + } + + dev_opp->supported_hw_count = count; + mutex_unlock(&dev_opp_list_lock); + return 0; + +err: + _remove_device_opp(dev_opp); +unlock: + mutex_unlock(&dev_opp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw); + +/** + * dev_pm_opp_put_supported_hw() - Releases resources blocked for supported hw + * @dev: Device for which supported-hw has to be set. + * + * This is required only for the V2 bindings, and is called for a matching + * dev_pm_opp_set_supported_hw(). Until this is called, the device_opp structure + * will not be freed. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_supported_hw(struct device *dev) +{ + struct device_opp *dev_opp; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' first */ + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + if (!dev_opp->supported_hw) { + dev_err(dev, "%s: Doesn't have supported hardware list\n", + __func__); + goto unlock; + } + + kfree(dev_opp->supported_hw); + dev_opp->supported_hw = NULL; + dev_opp->supported_hw_count = 0; + + /* Try freeing device_opp if this was the last blocking resource */ + _remove_device_opp(dev_opp); + +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_supported_hw); + +/** + * dev_pm_opp_set_prop_name() - Set prop-extn name + * @dev: Device for which the regulator has to be set. + * @name: name to postfix to properties. + * + * This is required only for the V2 bindings, and it enables a platform to + * specify the extn to be used for certain property names. The properties to + * which the extension will apply are opp-microvolt and opp-microamp. OPP core + * should postfix the property name with -<name> while looking for them. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +{ + struct device_opp *dev_opp; + int ret = 0; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + dev_opp = _add_device_opp(dev); + if (!dev_opp) { + ret = -ENOMEM; + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + /* Do we already have a prop-name associated with dev_opp? */ + if (dev_opp->prop_name) { + dev_err(dev, "%s: Already have prop-name %s\n", __func__, + dev_opp->prop_name); + ret = -EBUSY; + goto err; + } + + dev_opp->prop_name = kstrdup(name, GFP_KERNEL); + if (!dev_opp->prop_name) { + ret = -ENOMEM; + goto err; + } + + mutex_unlock(&dev_opp_list_lock); + return 0; + +err: + _remove_device_opp(dev_opp); +unlock: + mutex_unlock(&dev_opp_list_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name); + +/** + * dev_pm_opp_put_prop_name() - Releases resources blocked for prop-name + * @dev: Device for which the regulator has to be set. + * + * This is required only for the V2 bindings, and is called for a matching + * dev_pm_opp_set_prop_name(). Until this is called, the device_opp structure + * will not be freed. + * + * Locking: The internal device_opp and opp structures are RCU protected. + * Hence this function internally uses RCU updater strategy with mutex locks + * to keep the integrity of the internal data structures. Callers should ensure + * that this function is *NOT* called under RCU protection or in contexts where + * mutex cannot be locked. + */ +void dev_pm_opp_put_prop_name(struct device *dev) +{ + struct device_opp *dev_opp; + + /* Hold our list modification lock here */ + mutex_lock(&dev_opp_list_lock); + + /* Check for existing list for 'dev' first */ + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) { + dev_err(dev, "Failed to find dev_opp: %ld\n", PTR_ERR(dev_opp)); + goto unlock; + } + + /* Make sure there are no concurrent readers while updating dev_opp */ + WARN_ON(!list_empty(&dev_opp->opp_list)); + + if (!dev_opp->prop_name) { + dev_err(dev, "%s: Doesn't have a prop-name\n", __func__); + goto unlock; + } + + kfree(dev_opp->prop_name); + dev_opp->prop_name = NULL; + + /* Try freeing device_opp if this was the last blocking resource */ + _remove_device_opp(dev_opp); + +unlock: + mutex_unlock(&dev_opp_list_lock); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); + +static bool _opp_is_supported(struct device *dev, struct device_opp *dev_opp, + struct device_node *np) +{ + unsigned int count = dev_opp->supported_hw_count; + u32 version; + int ret; + + if (!dev_opp->supported_hw) + return true; + + while (count--) { + ret = of_property_read_u32_index(np, "opp-supported-hw", count, + &version); + if (ret) { + dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n", + __func__, count, ret); + return false; + } + + /* Both of these are bitwise masks of the versions */ + if (!(version & dev_opp->supported_hw[count])) + return false; + } + + return true; +} + +/** * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings) * @dev: device for which we do this operation * @np: device node @@ -864,6 +1156,12 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) goto free_opp; } + /* Check if the OPP supports hardware's hierarchy of versions or not */ + if (!_opp_is_supported(dev, dev_opp, np)) { + dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); + goto free_opp; + } + /* * Rate is defined as an unsigned long in clk API, and so casting * explicitly to its type. Must be fixed once rate is 64 bit @@ -879,7 +1177,7 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_parse_supplies(new_opp, dev); + ret = opp_parse_supplies(new_opp, dev, dev_opp); if (ret) goto free_opp; @@ -889,12 +1187,14 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) /* OPP to select on device suspend */ if (of_property_read_bool(np, "opp-suspend")) { - if (dev_opp->suspend_opp) + if (dev_opp->suspend_opp) { dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n", __func__, dev_opp->suspend_opp->rate, new_opp->rate); - else + } else { + new_opp->suspend = true; dev_opp->suspend_opp = new_opp; + } } if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) diff --git a/drivers/base/power/opp/debugfs.c b/drivers/base/power/opp/debugfs.c new file mode 100644 index 000000000000..ddfe4773e922 --- /dev/null +++ b/drivers/base/power/opp/debugfs.c @@ -0,0 +1,219 @@ +/* + * Generic OPP debugfs interface + * + * Copyright (C) 2015-2016 Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/limits.h> + +#include "opp.h" + +static struct dentry *rootdir; + +static void opp_set_dev_name(const struct device *dev, char *name) +{ + if (dev->parent) + snprintf(name, NAME_MAX, "%s-%s", dev_name(dev->parent), + dev_name(dev)); + else + snprintf(name, NAME_MAX, "%s", dev_name(dev)); +} + +void opp_debug_remove_one(struct dev_pm_opp *opp) +{ + debugfs_remove_recursive(opp->dentry); +} + +int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp) +{ + struct dentry *pdentry = dev_opp->dentry; + struct dentry *d; + char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */ + + /* Rate is unique to each OPP, use it to give opp-name */ + snprintf(name, sizeof(name), "opp:%lu", opp->rate); + + /* Create per-opp directory */ + d = debugfs_create_dir(name, pdentry); + if (!d) + return -ENOMEM; + + if (!debugfs_create_bool("available", S_IRUGO, d, &opp->available)) + return -ENOMEM; + + if (!debugfs_create_bool("dynamic", S_IRUGO, d, &opp->dynamic)) + return -ENOMEM; + + if (!debugfs_create_bool("turbo", S_IRUGO, d, &opp->turbo)) + return -ENOMEM; + + if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend)) + return -ENOMEM; + + if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max)) + return -ENOMEM; + + if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp)) + return -ENOMEM; + + if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d, + &opp->clock_latency_ns)) + return -ENOMEM; + + opp->dentry = d; + return 0; +} + +static int device_opp_debug_create_dir(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + const struct device *dev = list_dev->dev; + struct dentry *d; + + opp_set_dev_name(dev, dev_opp->dentry_name); + + /* Create device specific directory */ + d = debugfs_create_dir(dev_opp->dentry_name, rootdir); + if (!d) { + dev_err(dev, "%s: Failed to create debugfs dir\n", __func__); + return -ENOMEM; + } + + list_dev->dentry = d; + dev_opp->dentry = d; + + return 0; +} + +static int device_opp_debug_create_link(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + const struct device *dev = list_dev->dev; + char name[NAME_MAX]; + struct dentry *d; + + opp_set_dev_name(list_dev->dev, name); + + /* Create device specific directory link */ + d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name); + if (!d) { + dev_err(dev, "%s: Failed to create link\n", __func__); + return -ENOMEM; + } + + list_dev->dentry = d; + + return 0; +} + +/** + * opp_debug_register - add a device opp node to the debugfs 'opp' directory + * @list_dev: list-dev pointer for device + * @dev_opp: the device-opp being added + * + * Dynamically adds device specific directory in debugfs 'opp' directory. If the + * device-opp is shared with other devices, then links will be created for all + * devices except the first. + * + * Return: 0 on success, otherwise negative error. + */ +int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + if (!rootdir) { + pr_debug("%s: Uninitialized rootdir\n", __func__); + return -EINVAL; + } + + if (dev_opp->dentry) + return device_opp_debug_create_link(list_dev, dev_opp); + + return device_opp_debug_create_dir(list_dev, dev_opp); +} + +static void opp_migrate_dentry(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + struct device_list_opp *new_dev; + const struct device *dev; + struct dentry *dentry; + + /* Look for next list-dev */ + list_for_each_entry(new_dev, &dev_opp->dev_list, node) + if (new_dev != list_dev) + break; + + /* new_dev is guaranteed to be valid here */ + dev = new_dev->dev; + debugfs_remove_recursive(new_dev->dentry); + + opp_set_dev_name(dev, dev_opp->dentry_name); + + dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir, + dev_opp->dentry_name); + if (!dentry) { + dev_err(dev, "%s: Failed to rename link from: %s to %s\n", + __func__, dev_name(list_dev->dev), dev_name(dev)); + return; + } + + new_dev->dentry = dentry; + dev_opp->dentry = dentry; +} + +/** + * opp_debug_unregister - remove a device opp node from debugfs opp directory + * @list_dev: list-dev pointer for device + * @dev_opp: the device-opp being removed + * + * Dynamically removes device specific directory from debugfs 'opp' directory. + */ +void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ + if (list_dev->dentry == dev_opp->dentry) { + /* Move the real dentry object under another device */ + if (!list_is_singular(&dev_opp->dev_list)) { + opp_migrate_dentry(list_dev, dev_opp); + goto out; + } + dev_opp->dentry = NULL; + } + + debugfs_remove_recursive(list_dev->dentry); + +out: + list_dev->dentry = NULL; +} + +static int __init opp_debug_init(void) +{ + /* Create /sys/kernel/debug/opp directory */ + rootdir = debugfs_create_dir("opp", NULL); + if (!rootdir) { + pr_err("%s: Failed to create root directory\n", __func__); + return -ENOMEM; + } + + return 0; +} +core_initcall(opp_debug_init); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index 7366b2aa8997..690638ef36ee 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -17,6 +17,7 @@ #include <linux/device.h> #include <linux/kernel.h> #include <linux/list.h> +#include <linux/limits.h> #include <linux/pm_opp.h> #include <linux/rculist.h> #include <linux/rcupdate.h> @@ -50,9 +51,10 @@ extern struct mutex dev_opp_list_lock; * are protected by the dev_opp_list_lock for integrity. * IMPORTANT: the opp nodes should be maintained in increasing * order. - * @dynamic: not-created from static DT entries. * @available: true/false - marks if this OPP as available or not + * @dynamic: not-created from static DT entries. * @turbo: true if turbo (boost) OPP + * @suspend: true if suspend OPP * @rate: Frequency in hertz * @u_volt: Target voltage in microvolts corresponding to this OPP * @u_volt_min: Minimum voltage in microvolts corresponding to this OPP @@ -63,6 +65,7 @@ extern struct mutex dev_opp_list_lock; * @dev_opp: points back to the device_opp struct this opp belongs to * @rcu_head: RCU callback head used for deferred freeing * @np: OPP's device node. + * @dentry: debugfs dentry pointer (per opp) * * This structure stores the OPP information for a given device. */ @@ -72,6 +75,7 @@ struct dev_pm_opp { bool available; bool dynamic; bool turbo; + bool suspend; unsigned long rate; unsigned long u_volt; @@ -84,6 +88,10 @@ struct dev_pm_opp { struct rcu_head rcu_head; struct device_node *np; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; +#endif }; /** @@ -91,6 +99,7 @@ struct dev_pm_opp { * @node: list node * @dev: device to which the struct object belongs * @rcu_head: RCU callback head used for deferred freeing + * @dentry: debugfs dentry pointer (per device) * * This is an internal data structure maintaining the list of devices that are * managed by 'struct device_opp'. @@ -99,6 +108,10 @@ struct device_list_opp { struct list_head node; const struct device *dev; struct rcu_head rcu_head; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; +#endif }; /** @@ -113,7 +126,14 @@ struct device_list_opp { * @dev_list: list of devices that share these OPPs * @opp_list: list of opps * @np: struct device_node pointer for opp's DT node. + * @clock_latency_ns_max: Max clock latency in nanoseconds. * @shared_opp: OPP is shared between multiple devices. + * @suspend_opp: Pointer to OPP to be used during device suspend. + * @supported_hw: Array of version number to support. + * @supported_hw_count: Number of elements in supported_hw array. + * @prop_name: A name to postfix to many DT properties, while parsing them. + * @dentry: debugfs dentry pointer of the real device directory (not links). + * @dentry_name: Name of the real dentry. * * This is an internal data structure maintaining the link to opps attached to * a device. This structure is not meant to be shared to users as it is @@ -135,6 +155,15 @@ struct device_opp { unsigned long clock_latency_ns_max; bool shared_opp; struct dev_pm_opp *suspend_opp; + + unsigned int *supported_hw; + unsigned int supported_hw_count; + const char *prop_name; + +#ifdef CONFIG_DEBUG_FS + struct dentry *dentry; + char dentry_name[NAME_MAX]; +#endif }; /* Routines internal to opp core */ @@ -143,4 +172,26 @@ struct device_list_opp *_add_list_dev(const struct device *dev, struct device_opp *dev_opp); struct device_node *_of_get_opp_desc_node(struct device *dev); +#ifdef CONFIG_DEBUG_FS +void opp_debug_remove_one(struct dev_pm_opp *opp); +int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp); +int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp); +void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp); +#else +static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {} + +static inline int opp_debug_create_one(struct dev_pm_opp *opp, + struct device_opp *dev_opp) +{ return 0; } +static inline int opp_debug_register(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ return 0; } + +static inline void opp_debug_unregister(struct device_list_opp *list_dev, + struct device_opp *dev_opp) +{ } +#endif /* DEBUG_FS */ + #endif /* __DRIVER_OPP_H__ */ diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index b1f8a73e5a94..0031069b64c9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -6,6 +6,8 @@ config ARM_BIG_LITTLE_CPUFREQ tristate "Generic ARM big LITTLE CPUfreq driver" depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK + # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y + depends on !CPU_THERMAL || THERMAL select PM_OPP help This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. @@ -217,6 +219,16 @@ config ARM_SPEAR_CPUFREQ help This adds the CPUFreq driver support for SPEAr SOCs. +config ARM_STI_CPUFREQ + tristate "STi CPUFreq support" + depends on SOC_STIH407 + help + This driver uses the generic OPP framework to match the running + platform with a predefined set of suitable values. If not provided + we will fall-back so safe-values contained in Device Tree. Enable + this config option if you wish to add CPUFreq support for STi based + SoCs. + config ARM_TEGRA20_CPUFREQ bool "Tegra20 CPUFreq support" depends on ARCH_TEGRA diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index c0af1a1281c8..9e63fb1b09f8 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -73,6 +73,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o +obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index c5d256caa664..c251247ae661 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -23,6 +23,7 @@ #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/cpumask.h> +#include <linux/cpu_cooling.h> #include <linux/export.h> #include <linux/module.h> #include <linux/mutex.h> @@ -55,6 +56,7 @@ static bool bL_switching_enabled; #define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq) #define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq) +static struct thermal_cooling_device *cdev[MAX_CLUSTERS]; static struct cpufreq_arm_bL_ops *arm_bL_ops; static struct clk *clk[MAX_CLUSTERS]; static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1]; @@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy) static int bL_cpufreq_exit(struct cpufreq_policy *policy) { struct device *cpu_dev; + int cur_cluster = cpu_to_cluster(policy->cpu); + + if (cur_cluster < MAX_CLUSTERS) { + cpufreq_cooling_unregister(cdev[cur_cluster]); + cdev[cur_cluster] = NULL; + } cpu_dev = get_cpu_device(policy->cpu); if (!cpu_dev) { @@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy) return 0; } +static void bL_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct device *cpu_dev = get_cpu_device(policy->cpu); + int cur_cluster = cpu_to_cluster(policy->cpu); + struct device_node *np; + + /* Do not register a cpu_cooling device if we are in IKS mode */ + if (cur_cluster >= MAX_CLUSTERS) + return; + + np = of_node_get(cpu_dev->of_node); + if (WARN_ON(!np)) + return; + + if (of_find_property(np, "#cooling-cells", NULL)) { + u32 power_coefficient = 0; + + of_property_read_u32(np, "dynamic-power-coefficient", + &power_coefficient); + + cdev[cur_cluster] = of_cpufreq_power_cooling_register(np, + policy->related_cpus, power_coefficient, NULL); + if (IS_ERR(cdev[cur_cluster])) { + dev_err(cpu_dev, + "running cpufreq without cooling device: %ld\n", + PTR_ERR(cdev[cur_cluster])); + cdev[cur_cluster] = NULL; + } + } + of_node_put(np); +} + static struct cpufreq_driver bL_cpufreq_driver = { .name = "arm-big-little", .flags = CPUFREQ_STICKY | @@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = { .get = bL_cpufreq_get_rate, .init = bL_cpufreq_init, .exit = bL_cpufreq_exit, + .ready = bL_cpufreq_ready, .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 90d64081ddb3..1ceece9d6711 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -407,8 +407,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy) * thermal DT code takes care of matching them. */ if (of_find_property(np, "#cooling-cells", NULL)) { - priv->cdev = of_cpufreq_cooling_register(np, - policy->related_cpus); + u32 power_coefficient = 0; + + of_property_read_u32(np, "dynamic-power-coefficient", + &power_coefficient); + + priv->cdev = of_cpufreq_power_cooling_register(np, + policy->related_cpus, power_coefficient, NULL); if (IS_ERR(priv->cdev)) { dev_err(priv->cpu_dev, "running cpufreq without cooling device: %ld\n", diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 1fa1deb6e91f..606ad74abe6e 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -115,13 +115,13 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, - struct dbs_data *dbs_data, bool modify_all) +static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all) { + struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; if (modify_all) - dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); + dbs_check_cpu(dbs_data, policy->cpu); return delay_for_sampling_rate(cs_tuners->sampling_rate); } diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b260576ddb12..4de12fd35b1f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -158,47 +158,55 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) } EXPORT_SYMBOL_GPL(dbs_check_cpu); -static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, - unsigned int delay) +void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay) { - struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - - mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); -} - -void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, - unsigned int delay, bool all_cpus) -{ - int i; + struct dbs_data *dbs_data = policy->governor_data; + struct cpu_dbs_info *cdbs; + int cpu; - if (!all_cpus) { - /* - * Use raw_smp_processor_id() to avoid preemptible warnings. - * We know that this is only called with all_cpus == false from - * works that have been queued with *_work_on() functions and - * those works are canceled during CPU_DOWN_PREPARE so they - * can't possibly run on any other CPU. - */ - __gov_queue_work(raw_smp_processor_id(), dbs_data, delay); - } else { - for_each_cpu(i, policy->cpus) - __gov_queue_work(i, dbs_data, delay); + for_each_cpu(cpu, policy->cpus) { + cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + cdbs->timer.expires = jiffies + delay; + add_timer_on(&cdbs->timer, cpu); } } -EXPORT_SYMBOL_GPL(gov_queue_work); +EXPORT_SYMBOL_GPL(gov_add_timers); -static inline void gov_cancel_work(struct dbs_data *dbs_data, - struct cpufreq_policy *policy) +static inline void gov_cancel_timers(struct cpufreq_policy *policy) { + struct dbs_data *dbs_data = policy->governor_data; struct cpu_dbs_info *cdbs; int i; for_each_cpu(i, policy->cpus) { cdbs = dbs_data->cdata->get_cpu_cdbs(i); - cancel_delayed_work_sync(&cdbs->dwork); + del_timer_sync(&cdbs->timer); } } +void gov_cancel_work(struct cpu_common_dbs_info *shared) +{ + /* Tell dbs_timer_handler() to skip queuing up work items. */ + atomic_inc(&shared->skip_work); + /* + * If dbs_timer_handler() is already running, it may not notice the + * incremented skip_work, so wait for it to complete to prevent its work + * item from being queued up after the cancel_work_sync() below. + */ + gov_cancel_timers(shared->policy); + /* + * In case dbs_timer_handler() managed to run and spawn a work item + * before the timers have been canceled, wait for that work item to + * complete and then cancel all of the timers set up by it. If + * dbs_timer_handler() runs again at that point, it will see the + * positive value of skip_work and won't spawn any more work items. + */ + cancel_work_sync(&shared->work); + gov_cancel_timers(shared->policy); + atomic_set(&shared->skip_work, 0); +} +EXPORT_SYMBOL_GPL(gov_cancel_work); + /* Will return if we need to evaluate cpu load again or not */ static bool need_load_eval(struct cpu_common_dbs_info *shared, unsigned int sampling_rate) @@ -217,29 +225,21 @@ static bool need_load_eval(struct cpu_common_dbs_info *shared, return true; } -static void dbs_timer(struct work_struct *work) +static void dbs_work_handler(struct work_struct *work) { - struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, - dwork.work); - struct cpu_common_dbs_info *shared = cdbs->shared; + struct cpu_common_dbs_info *shared = container_of(work, struct + cpu_common_dbs_info, work); struct cpufreq_policy *policy; struct dbs_data *dbs_data; unsigned int sampling_rate, delay; - bool modify_all = true; - - mutex_lock(&shared->timer_mutex); + bool eval_load; policy = shared->policy; - - /* - * Governor might already be disabled and there is no point continuing - * with the work-handler. - */ - if (!policy) - goto unlock; - dbs_data = policy->governor_data; + /* Kill all timers */ + gov_cancel_timers(policy); + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -250,14 +250,37 @@ static void dbs_timer(struct work_struct *work) sampling_rate = od_tuners->sampling_rate; } - if (!need_load_eval(cdbs->shared, sampling_rate)) - modify_all = false; + eval_load = need_load_eval(shared, sampling_rate); - delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); - gov_queue_work(dbs_data, policy, delay, modify_all); - -unlock: + /* + * Make sure cpufreq_governor_limits() isn't evaluating load in + * parallel. + */ + mutex_lock(&shared->timer_mutex); + delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load); mutex_unlock(&shared->timer_mutex); + + atomic_dec(&shared->skip_work); + + gov_add_timers(policy, delay); +} + +static void dbs_timer_handler(unsigned long data) +{ + struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data; + struct cpu_common_dbs_info *shared = cdbs->shared; + + /* + * Timer handler may not be allowed to queue the work at the moment, + * because: + * - Another timer handler has done that + * - We are stopping the governor + * - Or we are updating the sampling rate of the ondemand governor + */ + if (atomic_inc_return(&shared->skip_work) > 1) + atomic_dec(&shared->skip_work); + else + queue_work(system_wq, &shared->work); } static void set_sampling_rate(struct dbs_data *dbs_data, @@ -287,6 +310,9 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy, for_each_cpu(j, policy->related_cpus) cdata->get_cpu_cdbs(j)->shared = shared; + mutex_init(&shared->timer_mutex); + atomic_set(&shared->skip_work, 0); + INIT_WORK(&shared->work, dbs_work_handler); return 0; } @@ -297,6 +323,8 @@ static void free_common_dbs_info(struct cpufreq_policy *policy, struct cpu_common_dbs_info *shared = cdbs->shared; int j; + mutex_destroy(&shared->timer_mutex); + for_each_cpu(j, policy->cpus) cdata->get_cpu_cdbs(j)->shared = NULL; @@ -433,7 +461,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, shared->policy = policy; shared->time_stamp = ktime_get(); - mutex_init(&shared->timer_mutex); for_each_cpu(j, policy->cpus) { struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); @@ -450,7 +477,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); + __setup_timer(&j_cdbs->timer, dbs_timer_handler, + (unsigned long)j_cdbs, + TIMER_DEFERRABLE | TIMER_IRQSAFE); } if (cdata->governor == GOV_CONSERVATIVE) { @@ -468,8 +497,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), - true); + gov_add_timers(policy, delay_for_sampling_rate(sampling_rate)); return 0; } @@ -483,18 +511,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, if (!shared || !shared->policy) return -EBUSY; - /* - * Work-handler must see this updated, as it should not proceed any - * further after governor is disabled. And so timer_mutex is taken while - * updating this value. - */ - mutex_lock(&shared->timer_mutex); + gov_cancel_work(shared); shared->policy = NULL; - mutex_unlock(&shared->timer_mutex); - - gov_cancel_work(dbs_data, policy); - mutex_destroy(&shared->timer_mutex); return 0; } diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 5621bb03e874..91e767a058a7 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -17,6 +17,7 @@ #ifndef _CPUFREQ_GOVERNOR_H #define _CPUFREQ_GOVERNOR_H +#include <linux/atomic.h> #include <linux/cpufreq.h> #include <linux/kernel_stat.h> #include <linux/module.h> @@ -132,12 +133,14 @@ static void *get_cpu_dbs_info_s(int cpu) \ struct cpu_common_dbs_info { struct cpufreq_policy *policy; /* - * percpu mutex that serializes governor limit change with dbs_timer - * invocation. We do not want dbs_timer to run when user is changing - * the governor or limits. + * Per policy mutex that serializes load evaluation from limit-change + * and work-handler. */ struct mutex timer_mutex; + ktime_t time_stamp; + atomic_t skip_work; + struct work_struct work; }; /* Per cpu structures */ @@ -152,7 +155,7 @@ struct cpu_dbs_info { * wake-up from idle. */ unsigned int prev_load; - struct delayed_work dwork; + struct timer_list timer; struct cpu_common_dbs_info *shared; }; @@ -209,8 +212,7 @@ struct common_dbs_data { struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs, - struct dbs_data *dbs_data, + unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy, bool modify_all); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); @@ -269,11 +271,11 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; +void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay); +void gov_cancel_work(struct cpu_common_dbs_info *shared); void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); -void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, - unsigned int delay, bool all_cpus); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 03ac6ce54042..eae51070c034 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -191,10 +191,9 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, - struct dbs_data *dbs_data, bool modify_all) +static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all) { - struct cpufreq_policy *policy = cdbs->shared->policy; + struct dbs_data *dbs_data = policy->governor_data; unsigned int cpu = policy->cpu; struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); @@ -247,40 +246,66 @@ static void update_sampling_rate(struct dbs_data *dbs_data, unsigned int new_rate) { struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct cpumask cpumask; int cpu; od_tuners->sampling_rate = new_rate = max(new_rate, dbs_data->min_sampling_rate); - for_each_online_cpu(cpu) { + /* + * Lock governor so that governor start/stop can't execute in parallel. + */ + mutex_lock(&od_dbs_cdata.mutex); + + cpumask_copy(&cpumask, cpu_online_mask); + + for_each_cpu(cpu, &cpumask) { struct cpufreq_policy *policy; struct od_cpu_dbs_info_s *dbs_info; + struct cpu_dbs_info *cdbs; + struct cpu_common_dbs_info *shared; unsigned long next_sampling, appointed_at; - policy = cpufreq_cpu_get(cpu); - if (!policy) - continue; - if (policy->governor != &cpufreq_gov_ondemand) { - cpufreq_cpu_put(policy); - continue; - } dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - cpufreq_cpu_put(policy); + cdbs = &dbs_info->cdbs; + shared = cdbs->shared; - if (!delayed_work_pending(&dbs_info->cdbs.dwork)) + /* + * A valid shared and shared->policy means governor hasn't + * stopped or exited yet. + */ + if (!shared || !shared->policy) + continue; + + policy = shared->policy; + + /* clear all CPUs of this policy */ + cpumask_andnot(&cpumask, &cpumask, policy->cpus); + + /* + * Update sampling rate for CPUs whose policy is governed by + * dbs_data. In case of governor_per_policy, only a single + * policy will be governed by dbs_data, otherwise there can be + * multiple policies that are governed by the same dbs_data. + */ + if (dbs_data != policy->governor_data) continue; + /* + * Checking this for any CPU should be fine, timers for all of + * them are scheduled together. + */ next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.dwork.timer.expires; + appointed_at = dbs_info->cdbs.timer.expires; if (time_before(next_sampling, appointed_at)) { - cancel_delayed_work_sync(&dbs_info->cdbs.dwork); - - gov_queue_work(dbs_data, policy, - usecs_to_jiffies(new_rate), true); + gov_cancel_work(shared); + gov_add_timers(policy, usecs_to_jiffies(new_rate)); } } + + mutex_unlock(&od_dbs_cdata.mutex); } static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 98fb8821382d..cd83d477e32d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -66,6 +66,7 @@ static inline int ceiling_fp(int32_t x) struct sample { int32_t core_pct_busy; + int32_t busy_scaled; u64 aperf; u64 mperf; u64 tsc; @@ -112,6 +113,7 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; + u64 prev_cummulative_iowait; struct sample sample; }; @@ -133,6 +135,7 @@ struct pstate_funcs { int (*get_scaling)(void); void (*set)(struct cpudata*, int pstate); void (*get_vid)(struct cpudata *); + int32_t (*get_target_pstate)(struct cpudata *); }; struct cpu_defaults { @@ -140,6 +143,9 @@ struct cpu_defaults { struct pstate_funcs funcs; }; +static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); +static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); + static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; @@ -738,6 +744,7 @@ static struct cpu_defaults core_params = { .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, .set = core_set_pstate, + .get_target_pstate = get_target_pstate_use_performance, }, }; @@ -758,6 +765,7 @@ static struct cpu_defaults silvermont_params = { .set = atom_set_pstate, .get_scaling = silvermont_get_scaling, .get_vid = atom_get_vid, + .get_target_pstate = get_target_pstate_use_cpu_load, }, }; @@ -778,6 +786,7 @@ static struct cpu_defaults airmont_params = { .set = atom_set_pstate, .get_scaling = airmont_get_scaling, .get_vid = atom_get_vid, + .get_target_pstate = get_target_pstate_use_cpu_load, }, }; @@ -797,6 +806,7 @@ static struct cpu_defaults knl_params = { .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, .set = core_set_pstate, + .get_target_pstate = get_target_pstate_use_performance, }, }; @@ -882,12 +892,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - if (cpu->prev_mperf == mperf) { + tsc = rdtsc(); + if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) { local_irq_restore(flags); return; } - - tsc = rdtsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; @@ -922,7 +931,43 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) +static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) +{ + struct sample *sample = &cpu->sample; + u64 cummulative_iowait, delta_iowait_us; + u64 delta_iowait_mperf; + u64 mperf, now; + int32_t cpu_load; + + cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + + /* + * Convert iowait time into number of IO cycles spent at max_freq. + * IO is considered as busy only for the cpu_load algorithm. For + * performance this is not needed since we always try to reach the + * maximum P-State, so we are already boosting the IOs. + */ + delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; + delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * + cpu->pstate.max_pstate, MSEC_PER_SEC); + + mperf = cpu->sample.mperf + delta_iowait_mperf; + cpu->prev_cummulative_iowait = cummulative_iowait; + + + /* + * The load can be estimated as the ratio of the mperf counter + * running at a constant frequency during active periods + * (C0) and the time stamp counter running at the same frequency + * also during C-states. + */ + cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); + cpu->sample.busy_scaled = cpu_load; + + return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); +} + +static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) { int32_t core_busy, max_pstate, current_pstate, sample_ratio; s64 duration_us; @@ -960,30 +1005,24 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) core_busy = mul_fp(core_busy, sample_ratio); } - return core_busy; + cpu->sample.busy_scaled = core_busy; + return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy); } static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { - int32_t busy_scaled; - struct _pid *pid; - signed int ctl; - int from; + int from, target_pstate; struct sample *sample; from = cpu->pstate.current_pstate; - pid = &cpu->pid; - busy_scaled = intel_pstate_get_scaled_busy(cpu); + target_pstate = pstate_funcs.get_target_pstate(cpu); - ctl = pid_calc(pid, busy_scaled); - - /* Negative values of ctl increase the pstate and vice versa */ - intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true); + intel_pstate_set_pstate(cpu, target_pstate, true); sample = &cpu->sample; trace_pstate_sample(fp_toint(sample->core_pct_busy), - fp_toint(busy_scaled), + fp_toint(sample->busy_scaled), from, cpu->pstate.current_pstate, sample->mperf, @@ -1237,6 +1276,8 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) pstate_funcs.get_scaling = funcs->get_scaling; pstate_funcs.set = funcs->set; pstate_funcs.get_vid = funcs->get_vid; + pstate_funcs.get_target_pstate = funcs->get_target_pstate; + } #if IS_ENABLED(CONFIG_ACPI) diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c index 83001dc5b646..fd601b92f5ec 100644 --- a/drivers/cpufreq/mt8173-cpufreq.c +++ b/drivers/cpufreq/mt8173-cpufreq.c @@ -41,16 +41,35 @@ * the original PLL becomes stable at target frequency. */ struct mtk_cpu_dvfs_info { + struct cpumask cpus; struct device *cpu_dev; struct regulator *proc_reg; struct regulator *sram_reg; struct clk *cpu_clk; struct clk *inter_clk; struct thermal_cooling_device *cdev; + struct list_head list_head; int intermediate_voltage; bool need_voltage_tracking; }; +static LIST_HEAD(dvfs_info_list); + +static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) +{ + struct mtk_cpu_dvfs_info *info; + struct list_head *list; + + list_for_each(list, &dvfs_info_list) { + info = list_entry(list, struct mtk_cpu_dvfs_info, list_head); + + if (cpumask_test_cpu(cpu, &info->cpus)) + return info; + } + + return NULL; +} + static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, int new_vproc) { @@ -59,7 +78,10 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, int old_vproc, old_vsram, new_vsram, vsram, vproc, ret; old_vproc = regulator_get_voltage(proc_reg); - old_vsram = regulator_get_voltage(sram_reg); + if (old_vproc < 0) { + pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); + return old_vproc; + } /* Vsram should not exceed the maximum allowed voltage of SoC. */ new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT); @@ -72,7 +94,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, */ do { old_vsram = regulator_get_voltage(sram_reg); + if (old_vsram < 0) { + pr_err("%s: invalid Vsram value: %d\n", + __func__, old_vsram); + return old_vsram; + } old_vproc = regulator_get_voltage(proc_reg); + if (old_vproc < 0) { + pr_err("%s: invalid Vproc value: %d\n", + __func__, old_vproc); + return old_vproc; + } vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT); @@ -117,7 +149,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info, */ do { old_vproc = regulator_get_voltage(proc_reg); + if (old_vproc < 0) { + pr_err("%s: invalid Vproc value: %d\n", + __func__, old_vproc); + return old_vproc; + } old_vsram = regulator_get_voltage(sram_reg); + if (old_vsram < 0) { + pr_err("%s: invalid Vsram value: %d\n", + __func__, old_vsram); + return old_vsram; + } vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT); ret = regulator_set_voltage(proc_reg, vproc, @@ -185,6 +227,10 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy, old_freq_hz = clk_get_rate(cpu_clk); old_vproc = regulator_get_voltage(info->proc_reg); + if (old_vproc < 0) { + pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc); + return old_vproc; + } freq_hz = freq_table[index].frequency * 1000; @@ -375,6 +421,9 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) */ info->need_voltage_tracking = !IS_ERR(sram_reg); + /* CPUs in the same cluster share a clock and power domain. */ + cpumask_copy(&info->cpus, &cpu_topology[cpu].core_sibling); + return 0; out_free_opp_table: @@ -413,22 +462,18 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) struct cpufreq_frequency_table *freq_table; int ret; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - ret = mtk_cpu_dvfs_info_init(info, policy->cpu); - if (ret) { - pr_err("%s failed to initialize dvfs info for cpu%d\n", - __func__, policy->cpu); - goto out_free_dvfs_info; + info = mtk_cpu_dvfs_info_lookup(policy->cpu); + if (!info) { + pr_err("dvfs info for cpu%d is not initialized.\n", + policy->cpu); + return -EINVAL; } ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table); if (ret) { pr_err("failed to init cpufreq table for cpu%d: %d\n", policy->cpu, ret); - goto out_release_dvfs_info; + return ret; } ret = cpufreq_table_validate_and_show(policy, freq_table); @@ -437,8 +482,7 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) goto out_free_cpufreq_table; } - /* CPUs in the same cluster share a clock and power domain. */ - cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling); + cpumask_copy(policy->cpus, &info->cpus); policy->driver_data = info; policy->clk = info->cpu_clk; @@ -446,13 +490,6 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy) out_free_cpufreq_table: dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table); - -out_release_dvfs_info: - mtk_cpu_dvfs_info_release(info); - -out_free_dvfs_info: - kfree(info); - return ret; } @@ -462,14 +499,13 @@ static int mtk_cpufreq_exit(struct cpufreq_policy *policy) cpufreq_cooling_unregister(info->cdev); dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table); - mtk_cpu_dvfs_info_release(info); - kfree(info); return 0; } static struct cpufreq_driver mt8173_cpufreq_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + CPUFREQ_HAVE_GOVERNOR_PER_POLICY, .verify = cpufreq_generic_frequency_table_verify, .target_index = mtk_cpufreq_set_target, .get = cpufreq_generic_get, @@ -482,11 +518,47 @@ static struct cpufreq_driver mt8173_cpufreq_driver = { static int mt8173_cpufreq_probe(struct platform_device *pdev) { - int ret; + struct mtk_cpu_dvfs_info *info; + struct list_head *list, *tmp; + int cpu, ret; + + for_each_possible_cpu(cpu) { + info = mtk_cpu_dvfs_info_lookup(cpu); + if (info) + continue; + + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto release_dvfs_info_list; + } + + ret = mtk_cpu_dvfs_info_init(info, cpu); + if (ret) { + dev_err(&pdev->dev, + "failed to initialize dvfs info for cpu%d\n", + cpu); + goto release_dvfs_info_list; + } + + list_add(&info->list_head, &dvfs_info_list); + } ret = cpufreq_register_driver(&mt8173_cpufreq_driver); - if (ret) - pr_err("failed to register mtk cpufreq driver\n"); + if (ret) { + dev_err(&pdev->dev, "failed to register mtk cpufreq driver\n"); + goto release_dvfs_info_list; + } + + return 0; + +release_dvfs_info_list: + list_for_each_safe(list, tmp, &dvfs_info_list) { + info = list_entry(list, struct mtk_cpu_dvfs_info, list_head); + + mtk_cpu_dvfs_info_release(info); + list_del(list); + } return ret; } diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index 2a0d58959acf..808a320e9d5d 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -555,6 +555,8 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->min = policy->cpuinfo.min_freq = ioread32(&pcch_hdr->minimum_frequency) * 1000; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + pr_debug("init: policy->max is %d, policy->min is %d\n", policy->max, policy->min); out: diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c index 358f0752c31e..b23e525a7af3 100644 --- a/drivers/cpufreq/qoriq-cpufreq.c +++ b/drivers/cpufreq/qoriq-cpufreq.c @@ -12,6 +12,7 @@ #include <linux/clk.h> #include <linux/cpufreq.h> +#include <linux/cpu_cooling.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> @@ -33,6 +34,7 @@ struct cpu_data { struct clk **pclk; struct cpufreq_frequency_table *table; + struct thermal_cooling_device *cdev; }; /** @@ -321,6 +323,27 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy, return clk_set_parent(policy->clk, parent); } + +static void qoriq_cpufreq_ready(struct cpufreq_policy *policy) +{ + struct cpu_data *cpud = policy->driver_data; + struct device_node *np = of_get_cpu_node(policy->cpu, NULL); + + if (of_find_property(np, "#cooling-cells", NULL)) { + cpud->cdev = of_cpufreq_cooling_register(np, + policy->related_cpus); + + if (IS_ERR(cpud->cdev)) { + pr_err("Failed to register cooling device cpu%d: %ld\n", + policy->cpu, PTR_ERR(cpud->cdev)); + + cpud->cdev = NULL; + } + } + + of_node_put(np); +} + static struct cpufreq_driver qoriq_cpufreq_driver = { .name = "qoriq_cpufreq", .flags = CPUFREQ_CONST_LOOPS, @@ -329,6 +352,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = { .verify = cpufreq_generic_frequency_table_verify, .target_index = qoriq_cpufreq_target, .get = cpufreq_generic_get, + .ready = qoriq_cpufreq_ready, .attr = cpufreq_generic_attr, }; diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c new file mode 100644 index 000000000000..a9c659f58974 --- /dev/null +++ b/drivers/cpufreq/sti-cpufreq.c @@ -0,0 +1,294 @@ +/* + * Match running platform with pre-defined OPP values for CPUFreq + * + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * Lee Jones <lee.jones@linaro.org> + * + * Copyright (C) 2015 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the version 2 of the GNU General Public License as + * published by the Free Software Foundation + */ + +#include <linux/cpu.h> +#include <linux/io.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/pm_opp.h> +#include <linux/regmap.h> + +#define VERSION_ELEMENTS 3 +#define MAX_PCODE_NAME_LEN 7 + +#define VERSION_SHIFT 28 +#define HW_INFO_INDEX 1 +#define MAJOR_ID_INDEX 1 +#define MINOR_ID_INDEX 2 + +/* + * Only match on "suitable for ALL versions" entries + * + * This will be used with the BIT() macro. It sets the + * top bit of a 32bit value and is equal to 0x80000000. + */ +#define DEFAULT_VERSION 31 + +enum { + PCODE = 0, + SUBSTRATE, + DVFS_MAX_REGFIELDS, +}; + +/** + * ST CPUFreq Driver Data + * + * @cpu_node CPU's OF node + * @syscfg_eng Engineering Syscon register map + * @regmap Syscon register map + */ +static struct sti_cpufreq_ddata { + struct device *cpu; + struct regmap *syscfg_eng; + struct regmap *syscfg; +} ddata; + +static int sti_cpufreq_fetch_major(void) { + struct device_node *np = ddata.cpu->of_node; + struct device *dev = ddata.cpu; + unsigned int major_offset; + unsigned int socid; + int ret; + + ret = of_property_read_u32_index(np, "st,syscfg", + MAJOR_ID_INDEX, &major_offset); + if (ret) { + dev_err(dev, "No major number offset provided in %s [%d]\n", + np->full_name, ret); + return ret; + } + + ret = regmap_read(ddata.syscfg, major_offset, &socid); + if (ret) { + dev_err(dev, "Failed to read major number from syscon [%d]\n", + ret); + return ret; + } + + return ((socid >> VERSION_SHIFT) & 0xf) + 1; +} + +static int sti_cpufreq_fetch_minor(void) +{ + struct device *dev = ddata.cpu; + struct device_node *np = dev->of_node; + unsigned int minor_offset; + unsigned int minid; + int ret; + + ret = of_property_read_u32_index(np, "st,syscfg-eng", + MINOR_ID_INDEX, &minor_offset); + if (ret) { + dev_err(dev, + "No minor number offset provided %s [%d]\n", + np->full_name, ret); + return ret; + } + + ret = regmap_read(ddata.syscfg_eng, minor_offset, &minid); + if (ret) { + dev_err(dev, + "Failed to read the minor number from syscon [%d]\n", + ret); + return ret; + } + + return minid & 0xf; +} + +static int sti_cpufreq_fetch_regmap_field(const struct reg_field *reg_fields, + int hw_info_offset, int field) +{ + struct regmap_field *regmap_field; + struct reg_field reg_field = reg_fields[field]; + struct device *dev = ddata.cpu; + unsigned int value; + int ret; + + reg_field.reg = hw_info_offset; + regmap_field = devm_regmap_field_alloc(dev, + ddata.syscfg_eng, + reg_field); + if (IS_ERR(regmap_field)) { + dev_err(dev, "Failed to allocate reg field\n"); + return PTR_ERR(regmap_field); + } + + ret = regmap_field_read(regmap_field, &value); + if (ret) { + dev_err(dev, "Failed to read %s code\n", + field ? "SUBSTRATE" : "PCODE"); + return ret; + } + + return value; +} + +static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = { + [PCODE] = REG_FIELD(0, 16, 19), + [SUBSTRATE] = REG_FIELD(0, 0, 2), +}; + +static const struct reg_field *sti_cpufreq_match(void) +{ + if (of_machine_is_compatible("st,stih407") || + of_machine_is_compatible("st,stih410")) + return sti_stih407_dvfs_regfields; + + return NULL; +} + +static int sti_cpufreq_set_opp_info(void) +{ + struct device *dev = ddata.cpu; + struct device_node *np = dev->of_node; + const struct reg_field *reg_fields; + unsigned int hw_info_offset; + unsigned int version[VERSION_ELEMENTS]; + int pcode, substrate, major, minor; + int ret; + char name[MAX_PCODE_NAME_LEN]; + + reg_fields = sti_cpufreq_match(); + if (!reg_fields) { + dev_err(dev, "This SoC doesn't support voltage scaling"); + return -ENODEV; + } + + ret = of_property_read_u32_index(np, "st,syscfg-eng", + HW_INFO_INDEX, &hw_info_offset); + if (ret) { + dev_warn(dev, "Failed to read HW info offset from DT\n"); + substrate = DEFAULT_VERSION; + pcode = 0; + goto use_defaults; + } + + pcode = sti_cpufreq_fetch_regmap_field(reg_fields, + hw_info_offset, + PCODE); + if (pcode < 0) { + dev_warn(dev, "Failed to obtain process code\n"); + /* Use default pcode */ + pcode = 0; + } + + substrate = sti_cpufreq_fetch_regmap_field(reg_fields, + hw_info_offset, + SUBSTRATE); + if (substrate) { + dev_warn(dev, "Failed to obtain substrate code\n"); + /* Use default substrate */ + substrate = DEFAULT_VERSION; + } + +use_defaults: + major = sti_cpufreq_fetch_major(); + if (major < 0) { + dev_err(dev, "Failed to obtain major version\n"); + /* Use default major number */ + major = DEFAULT_VERSION; + } + + minor = sti_cpufreq_fetch_minor(); + if (minor < 0) { + dev_err(dev, "Failed to obtain minor version\n"); + /* Use default minor number */ + minor = DEFAULT_VERSION; + } + + snprintf(name, MAX_PCODE_NAME_LEN, "pcode%d", pcode); + + ret = dev_pm_opp_set_prop_name(dev, name); + if (ret) { + dev_err(dev, "Failed to set prop name\n"); + return ret; + } + + version[0] = BIT(major); + version[1] = BIT(minor); + version[2] = BIT(substrate); + + ret = dev_pm_opp_set_supported_hw(dev, version, VERSION_ELEMENTS); + if (ret) { + dev_err(dev, "Failed to set supported hardware\n"); + return ret; + } + + dev_dbg(dev, "pcode: %d major: %d minor: %d substrate: %d\n", + pcode, major, minor, substrate); + dev_dbg(dev, "version[0]: %x version[1]: %x version[2]: %x\n", + version[0], version[1], version[2]); + + return 0; +} + +static int sti_cpufreq_fetch_syscon_regsiters(void) +{ + struct device *dev = ddata.cpu; + struct device_node *np = dev->of_node; + + ddata.syscfg = syscon_regmap_lookup_by_phandle(np, "st,syscfg"); + if (IS_ERR(ddata.syscfg)) { + dev_err(dev, "\"st,syscfg\" not supplied\n"); + return PTR_ERR(ddata.syscfg); + } + + ddata.syscfg_eng = syscon_regmap_lookup_by_phandle(np, "st,syscfg-eng"); + if (IS_ERR(ddata.syscfg_eng)) { + dev_err(dev, "\"st,syscfg-eng\" not supplied\n"); + return PTR_ERR(ddata.syscfg_eng); + } + + return 0; +} + +static int sti_cpufreq_init(void) +{ + int ret; + + ddata.cpu = get_cpu_device(0); + if (!ddata.cpu) { + dev_err(ddata.cpu, "Failed to get device for CPU0\n"); + goto skip_voltage_scaling; + } + + if (!of_get_property(ddata.cpu->of_node, "operating-points-v2", NULL)) { + dev_err(ddata.cpu, "OPP-v2 not supported\n"); + goto skip_voltage_scaling; + } + + ret = sti_cpufreq_fetch_syscon_regsiters(); + if (ret) + goto skip_voltage_scaling; + + ret = sti_cpufreq_set_opp_info(); + if (!ret) + goto register_cpufreq_dt; + +skip_voltage_scaling: + dev_err(ddata.cpu, "Not doing voltage scaling\n"); + +register_cpufreq_dt: + platform_device_register_simple("cpufreq-dt", -1, NULL, 0); + + return 0; +} +module_init(sti_cpufreq_init); + +MODULE_DESCRIPTION("STMicroelectronics CPUFreq/OPP driver"); +MODULE_AUTHOR("Ajitpal Singh <ajitpal.singh@st.com>"); +MODULE_AUTHOR("Lee Jones <lee.jones@linaro.org>"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 9a2e50337af9..95403d2ccaf5 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -55,6 +55,11 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq); int dev_pm_opp_disable(struct device *dev, unsigned long freq); struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev); +int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions, + unsigned int count); +void dev_pm_opp_put_supported_hw(struct device *dev); +int dev_pm_opp_set_prop_name(struct device *dev, const char *name); +void dev_pm_opp_put_prop_name(struct device *dev); #else static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { @@ -129,6 +134,23 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( { return ERR_PTR(-EINVAL); } + +static inline int dev_pm_opp_set_supported_hw(struct device *dev, + const u32 *versions, + unsigned int count) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_supported_hw(struct device *dev) {} + +static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_put_prop_name(struct device *dev) {} + #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) |