summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/bus.c34
-rw-r--r--drivers/acpi/cppc_acpi.c44
-rw-r--r--drivers/base/power/common.c8
-rw-r--r--drivers/base/power/domain.c278
-rw-r--r--drivers/base/power/domain_governor.c65
-rw-r--r--drivers/base/power/runtime.c53
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c211
-rw-r--r--drivers/cpufreq/cpufreq.c112
-rw-r--r--drivers/cpufreq/cpufreq_governor.c20
-rw-r--r--drivers/cpufreq/cpufreq_governor.h1
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c4
-rw-r--r--drivers/cpufreq/pasemi-cpufreq.c1
-rw-r--r--drivers/cpufreq/pmac32-cpufreq.c2
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c2
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq.c1
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq_pmi.c2
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c4
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c4
-rw-r--r--drivers/cpuidle/cpuidle-psci.c46
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c4
-rw-r--r--drivers/devfreq/devfreq.c20
-rw-r--r--drivers/devfreq/governor.h27
-rw-r--r--drivers/devfreq/governor_passive.c403
-rw-r--r--drivers/devfreq/rk3399_dmc.c312
-rw-r--r--drivers/idle/intel_idle.c133
-rw-r--r--drivers/iio/chemical/scd30.h5
-rw-r--r--drivers/iio/chemical/scd30_core.c10
-rw-r--r--drivers/iio/chemical/scd30_i2c.c3
-rw-r--r--drivers/iio/chemical/scd30_serial.c3
-rw-r--r--drivers/opp/of.c6
-rw-r--r--drivers/powercap/dtpm_cpu.c2
-rw-r--r--drivers/powercap/intel_rapl_common.c4
-rw-r--r--drivers/powercap/intel_rapl_msr.c1
-rw-r--r--drivers/soc/rockchip/pm_domains.c118
-rw-r--r--drivers/thermal/cpufreq_cooling.c2
-rw-r--r--drivers/thermal/devfreq_cooling.c8
37 files changed, 1469 insertions, 486 deletions
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index fe0000eb7cae..b67d2ee77cd1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -279,6 +279,20 @@ bool osc_pc_lpi_support_confirmed;
EXPORT_SYMBOL_GPL(osc_pc_lpi_support_confirmed);
/*
+ * ACPI 6.2 Section 6.2.11.2 'Platform-Wide OSPM Capabilities':
+ * Starting with ACPI Specification 6.2, all _CPC registers can be in
+ * PCC, System Memory, System IO, or Functional Fixed Hardware address
+ * spaces. OSPM support for this more flexible register space scheme is
+ * indicated by the “Flexible Address Space for CPPC Registers” _OSC bit.
+ *
+ * Otherwise (cf ACPI 6.1, s8.4.7.1.1.X), _CPC registers must be in:
+ * - PCC or Functional Fixed Hardware address space if defined
+ * - SystemMemory address space (NULL register) if not defined
+ */
+bool osc_cpc_flexible_adr_space_confirmed;
+EXPORT_SYMBOL_GPL(osc_cpc_flexible_adr_space_confirmed);
+
+/*
* ACPI 6.4 Operating System Capabilities for USB.
*/
bool osc_sb_native_usb4_support_confirmed;
@@ -315,12 +329,15 @@ static void acpi_bus_osc_negotiate_platform_control(void)
#endif
#ifdef CONFIG_X86
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_GENERIC_INITIATOR_SUPPORT;
- if (boot_cpu_has(X86_FEATURE_HWP)) {
- capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
- capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
- }
#endif
+#ifdef CONFIG_ACPI_CPPC_LIB
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
+#endif
+
+ capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
+
if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
@@ -341,10 +358,9 @@ static void acpi_bus_osc_negotiate_platform_control(void)
return;
}
-#ifdef CONFIG_X86
- if (boot_cpu_has(X86_FEATURE_HWP))
- osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
- (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
+#ifdef CONFIG_ACPI_CPPC_LIB
+ osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
+ (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
#endif
/*
@@ -366,6 +382,8 @@ static void acpi_bus_osc_negotiate_platform_control(void)
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
osc_sb_native_usb4_support_confirmed =
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
+ osc_cpc_flexible_adr_space_confirmed =
+ capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPC_FLEXIBLE_ADR_SPACE;
}
kfree(context.ret.pointer);
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index bc1454789a06..3b299b28a8af 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
(cpc)->cpc_entry.reg.space_id == \
ACPI_ADR_SPACE_PLATFORM_COMM)
+/* Check if a CPC register is in SystemMemory */
+#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_SYSTEM_MEMORY)
+
+/* Check if a CPC register is in SystemIo */
+#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_SYSTEM_IO)
+
/* Evaluates to True if reg is a NULL register descriptor */
#define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \
(reg)->address == 0 && \
@@ -424,6 +434,24 @@ bool acpi_cpc_valid(void)
}
EXPORT_SYMBOL_GPL(acpi_cpc_valid);
+bool cppc_allow_fast_switch(void)
+{
+ struct cpc_register_resource *desired_reg;
+ struct cpc_desc *cpc_ptr;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+ desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF];
+ if (!CPC_IN_SYSTEM_MEMORY(desired_reg) &&
+ !CPC_IN_SYSTEM_IO(desired_reg))
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(cppc_allow_fast_switch);
+
/**
* acpi_get_psd_map - Map the CPUs in the freq domain of a given cpu
* @cpu: Find all CPUs that share a domain with cpu.
@@ -736,6 +764,11 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
if (gas_t->address) {
void __iomem *addr;
+ if (!osc_cpc_flexible_adr_space_confirmed) {
+ pr_debug("Flexible address space capability not supported\n");
+ goto out_free;
+ }
+
addr = ioremap(gas_t->address, gas_t->bit_width/8);
if (!addr)
goto out_free;
@@ -758,6 +791,10 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
gas_t->address);
goto out_free;
}
+ if (!osc_cpc_flexible_adr_space_confirmed) {
+ pr_debug("Flexible address space capability not supported\n");
+ goto out_free;
+ }
} else {
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
/* Support only PCC, SystemMemory, SystemIO, and FFH type regs. */
@@ -1447,6 +1484,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf);
* transition latency for performance change requests. The closest we have
* is the timing information from the PCCT tables which provides the info
* on the number and frequency of PCC commands the platform can handle.
+ *
+ * If desired_reg is in the SystemMemory or SystemIo ACPI address space,
+ * then assume there is no latency.
*/
unsigned int cppc_get_transition_latency(int cpu_num)
{
@@ -1472,7 +1512,9 @@ unsigned int cppc_get_transition_latency(int cpu_num)
return CPUFREQ_ETERNAL;
desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
- if (!CPC_IN_PCC(desired_reg))
+ if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg))
+ return 0;
+ else if (!CPC_IN_PCC(desired_reg))
return CPUFREQ_ETERNAL;
if (pcc_ss_id < 0)
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index bbddb267c2e6..72115917e0bd 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -172,10 +172,10 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
* @dev: Device to detach.
* @power_off: Used to indicate whether we should power off the device.
*
- * This functions will reverse the actions from dev_pm_domain_attach() and
- * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain.
- * Typically it should be invoked during the remove phase, either from
- * subsystem level code or from drivers.
+ * This functions will reverse the actions from dev_pm_domain_attach(),
+ * dev_pm_domain_attach_by_id() and dev_pm_domain_attach_by_name(), thus it
+ * detaches @dev from its PM domain. Typically it should be invoked during the
+ * remove phase, either from subsystem level code or from drivers.
*
* Callers must ensure proper synchronization of this function with power
* management callbacks.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1ee878d126fd..739e52cd4aba 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -131,7 +131,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
#define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON)
-static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
+static inline bool irq_safe_dev_in_sleep_domain(struct device *dev,
const struct generic_pm_domain *genpd)
{
bool ret;
@@ -139,11 +139,14 @@ static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
ret = pm_runtime_is_irq_safe(dev) && !genpd_is_irq_safe(genpd);
/*
- * Warn once if an IRQ safe device is attached to a no sleep domain, as
- * to indicate a suboptimal configuration for PM. For an always on
- * domain this isn't case, thus don't warn.
+ * Warn once if an IRQ safe device is attached to a domain, which
+ * callbacks are allowed to sleep. This indicates a suboptimal
+ * configuration for PM, but it doesn't matter for an always on domain.
*/
- if (ret && !genpd_is_always_on(genpd))
+ if (genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd))
+ return ret;
+
+ if (ret)
dev_warn_once(dev, "PM domain %s will not be powered off\n",
genpd->name);
@@ -225,24 +228,23 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd)
static void genpd_update_accounting(struct generic_pm_domain *genpd)
{
- ktime_t delta, now;
+ u64 delta, now;
- now = ktime_get();
- delta = ktime_sub(now, genpd->accounting_time);
+ now = ktime_get_mono_fast_ns();
+ if (now <= genpd->accounting_time)
+ return;
+
+ delta = now - genpd->accounting_time;
/*
* If genpd->status is active, it means we are just
* out of off and so update the idle time and vice
* versa.
*/
- if (genpd->status == GENPD_STATE_ON) {
- int state_idx = genpd->state_idx;
-
- genpd->states[state_idx].idle_time =
- ktime_add(genpd->states[state_idx].idle_time, delta);
- } else {
- genpd->on_time = ktime_add(genpd->on_time, delta);
- }
+ if (genpd->status == GENPD_STATE_ON)
+ genpd->states[genpd->state_idx].idle_time += delta;
+ else
+ genpd->on_time += delta;
genpd->accounting_time = now;
}
@@ -476,15 +478,16 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state);
*/
void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next)
{
- struct generic_pm_domain_data *gpd_data;
struct generic_pm_domain *genpd;
+ struct gpd_timing_data *td;
genpd = dev_to_genpd_safe(dev);
if (!genpd)
return;
- gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
- gpd_data->next_wakeup = next;
+ td = to_gpd_data(dev->power.subsys_data->domain_data)->td;
+ if (td)
+ td->next_wakeup = next;
}
EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup);
@@ -506,6 +509,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
if (!genpd->power_on)
goto out;
+ timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
if (!timed) {
ret = genpd->power_on(genpd);
if (ret)
@@ -524,7 +528,7 @@ static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed)
goto out;
genpd->states[state_idx].power_on_latency_ns = elapsed_ns;
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "on", elapsed_ns);
@@ -555,6 +559,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
if (!genpd->power_off)
goto out;
+ timed = timed && genpd->gd && !genpd->states[state_idx].fwnode;
if (!timed) {
ret = genpd->power_off(genpd);
if (ret)
@@ -573,7 +578,7 @@ static int _genpd_power_off(struct generic_pm_domain *genpd, bool timed)
goto out;
genpd->states[state_idx].power_off_latency_ns = elapsed_ns;
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
pr_debug("%s: Power-%s latency exceeded, new value %lld ns\n",
genpd->name, "off", elapsed_ns);
@@ -649,18 +654,12 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
}
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
- enum pm_qos_flags_status stat;
-
- stat = dev_pm_qos_flags(pdd->dev, PM_QOS_FLAG_NO_POWER_OFF);
- if (stat > PM_QOS_FLAGS_NONE)
- return -EBUSY;
-
/*
* Do not allow PM domain to be powered off, when an IRQ safe
* device is part of a non-IRQ safe domain.
*/
if (!pm_runtime_suspended(pdd->dev) ||
- irq_safe_dev_in_no_sleep_domain(pdd->dev, genpd))
+ irq_safe_dev_in_sleep_domain(pdd->dev, genpd))
not_suspended++;
}
@@ -775,25 +774,27 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
dev = gpd_data->base.dev;
for (;;) {
- struct generic_pm_domain *genpd;
+ struct generic_pm_domain *genpd = ERR_PTR(-ENODATA);
struct pm_domain_data *pdd;
+ struct gpd_timing_data *td;
spin_lock_irq(&dev->power.lock);
pdd = dev->power.subsys_data ?
dev->power.subsys_data->domain_data : NULL;
if (pdd) {
- to_gpd_data(pdd)->td.constraint_changed = true;
- genpd = dev_to_genpd(dev);
- } else {
- genpd = ERR_PTR(-ENODATA);
+ td = to_gpd_data(pdd)->td;
+ if (td) {
+ td->constraint_changed = true;
+ genpd = dev_to_genpd(dev);
+ }
}
spin_unlock_irq(&dev->power.lock);
if (!IS_ERR(genpd)) {
genpd_lock(genpd);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
genpd_unlock(genpd);
}
@@ -879,9 +880,9 @@ static int genpd_runtime_suspend(struct device *dev)
struct generic_pm_domain *genpd;
bool (*suspend_ok)(struct device *__dev);
struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
- struct gpd_timing_data *td = &gpd_data->td;
+ struct gpd_timing_data *td = gpd_data->td;
bool runtime_pm = pm_runtime_enabled(dev);
- ktime_t time_start;
+ ktime_t time_start = 0;
s64 elapsed_ns;
int ret;
@@ -902,8 +903,7 @@ static int genpd_runtime_suspend(struct device *dev)
return -EBUSY;
/* Measure suspend latency. */
- time_start = 0;
- if (runtime_pm)
+ if (td && runtime_pm)
time_start = ktime_get();
ret = __genpd_runtime_suspend(dev);
@@ -917,13 +917,13 @@ static int genpd_runtime_suspend(struct device *dev)
}
/* Update suspend latency value if the measured time exceeds it. */
- if (runtime_pm) {
+ if (td && runtime_pm) {
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
if (elapsed_ns > td->suspend_latency_ns) {
td->suspend_latency_ns = elapsed_ns;
dev_dbg(dev, "suspend latency exceeded, %lld ns\n",
elapsed_ns);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
td->constraint_changed = true;
}
}
@@ -932,7 +932,7 @@ static int genpd_runtime_suspend(struct device *dev)
* If power.irq_safe is set, this routine may be run with
* IRQs disabled, so suspend only if the PM domain also is irq_safe.
*/
- if (irq_safe_dev_in_no_sleep_domain(dev, genpd))
+ if (irq_safe_dev_in_sleep_domain(dev, genpd))
return 0;
genpd_lock(genpd);
@@ -955,12 +955,11 @@ static int genpd_runtime_resume(struct device *dev)
{
struct generic_pm_domain *genpd;
struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
- struct gpd_timing_data *td = &gpd_data->td;
- bool runtime_pm = pm_runtime_enabled(dev);
- ktime_t time_start;
+ struct gpd_timing_data *td = gpd_data->td;
+ bool timed = td && pm_runtime_enabled(dev);
+ ktime_t time_start = 0;
s64 elapsed_ns;
int ret;
- bool timed = true;
dev_dbg(dev, "%s()\n", __func__);
@@ -972,10 +971,8 @@ static int genpd_runtime_resume(struct device *dev)
* As we don't power off a non IRQ safe domain, which holds
* an IRQ safe device, we don't need to restore power to it.
*/
- if (irq_safe_dev_in_no_sleep_domain(dev, genpd)) {
- timed = false;
+ if (irq_safe_dev_in_sleep_domain(dev, genpd))
goto out;
- }
genpd_lock(genpd);
ret = genpd_power_on(genpd, 0);
@@ -988,8 +985,7 @@ static int genpd_runtime_resume(struct device *dev)
out:
/* Measure resume latency. */
- time_start = 0;
- if (timed && runtime_pm)
+ if (timed)
time_start = ktime_get();
ret = genpd_start_dev(genpd, dev);
@@ -1001,13 +997,13 @@ static int genpd_runtime_resume(struct device *dev)
goto err_stop;
/* Update resume latency value if the measured time exceeds it. */
- if (timed && runtime_pm) {
+ if (timed) {
elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start));
if (elapsed_ns > td->resume_latency_ns) {
td->resume_latency_ns = elapsed_ns;
dev_dbg(dev, "resume latency exceeded, %lld ns\n",
elapsed_ns);
- genpd->max_off_time_changed = true;
+ genpd->gd->max_off_time_changed = true;
td->constraint_changed = true;
}
}
@@ -1500,9 +1496,11 @@ EXPORT_SYMBOL_GPL(dev_pm_genpd_resume);
#endif /* CONFIG_PM_SLEEP */
-static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
+static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
+ bool has_governor)
{
struct generic_pm_domain_data *gpd_data;
+ struct gpd_timing_data *td;
int ret;
ret = dev_pm_get_subsys_data(dev);
@@ -1516,26 +1514,38 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev)
}
gpd_data->base.dev = dev;
- gpd_data->td.constraint_changed = true;
- gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier;
- gpd_data->next_wakeup = KTIME_MAX;
- spin_lock_irq(&dev->power.lock);
+ /* Allocate data used by a governor. */
+ if (has_governor) {
+ td = kzalloc(sizeof(*td), GFP_KERNEL);
+ if (!td) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
- if (dev->power.subsys_data->domain_data) {
- ret = -EINVAL;
- goto err_free;
+ td->constraint_changed = true;
+ td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
+ td->next_wakeup = KTIME_MAX;
+ gpd_data->td = td;
}
- dev->power.subsys_data->domain_data = &gpd_data->base;
+ spin_lock_irq(&dev->power.lock);
+
+ if (dev->power.subsys_data->domain_data)
+ ret = -EINVAL;
+ else
+ dev->power.subsys_data->domain_data = &gpd_data->base;
spin_unlock_irq(&dev->power.lock);
+ if (ret)
+ goto err_free;
+
return gpd_data;
err_free:
- spin_unlock_irq(&dev->power.lock);
+ kfree(gpd_data->td);
kfree(gpd_data);
err_put:
dev_pm_put_subsys_data(dev);
@@ -1551,6 +1561,7 @@ static void genpd_free_dev_data(struct device *dev,
spin_unlock_irq(&dev->power.lock);
+ kfree(gpd_data->td);
kfree(gpd_data);
dev_pm_put_subsys_data(dev);
}
@@ -1607,6 +1618,7 @@ static int genpd_get_cpu(struct generic_pm_domain *genpd, struct device *dev)
static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
struct device *base_dev)
{
+ struct genpd_governor_data *gd = genpd->gd;
struct generic_pm_domain_data *gpd_data;
int ret;
@@ -1615,7 +1627,7 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
return -EINVAL;
- gpd_data = genpd_alloc_dev_data(dev);
+ gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
@@ -1631,7 +1643,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_pm_domain_set(dev, &genpd->domain);
genpd->device_count++;
- genpd->max_off_time_changed = true;
+ if (gd)
+ gd->max_off_time_changed = true;
list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
@@ -1685,7 +1698,8 @@ static int genpd_remove_device(struct generic_pm_domain *genpd,
}
genpd->device_count--;
- genpd->max_off_time_changed = true;
+ if (genpd->gd)
+ genpd->gd->max_off_time_changed = true;
genpd_clear_cpumask(genpd, gpd_data->cpu);
dev_pm_domain_set(dev, NULL);
@@ -1958,6 +1972,53 @@ static int genpd_set_default_power_state(struct generic_pm_domain *genpd)
return 0;
}
+static int genpd_alloc_data(struct generic_pm_domain *genpd)
+{
+ struct genpd_governor_data *gd = NULL;
+ int ret;
+
+ if (genpd_is_cpu_domain(genpd) &&
+ !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (genpd->gov) {
+ gd = kzalloc(sizeof(*gd), GFP_KERNEL);
+ if (!gd) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ gd->max_off_time_ns = -1;
+ gd->max_off_time_changed = true;
+ gd->next_wakeup = KTIME_MAX;
+ }
+
+ /* Use only one "off" state if there were no states declared */
+ if (genpd->state_count == 0) {
+ ret = genpd_set_default_power_state(genpd);
+ if (ret)
+ goto free;
+ }
+
+ genpd->gd = gd;
+ return 0;
+
+free:
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ kfree(gd);
+ return ret;
+}
+
+static void genpd_free_data(struct generic_pm_domain *genpd)
+{
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ if (genpd->free_states)
+ genpd->free_states(genpd->states, genpd->state_count);
+ kfree(genpd->gd);
+}
+
static void genpd_lock_init(struct generic_pm_domain *genpd)
{
if (genpd->flags & GENPD_FLAG_IRQ_SAFE) {
@@ -1995,11 +2056,9 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
atomic_set(&genpd->sd_count, 0);
genpd->status = is_off ? GENPD_STATE_OFF : GENPD_STATE_ON;
genpd->device_count = 0;
- genpd->max_off_time_ns = -1;
- genpd->max_off_time_changed = true;
genpd->provider = NULL;
genpd->has_provider = false;
- genpd->accounting_time = ktime_get();
+ genpd->accounting_time = ktime_get_mono_fast_ns();
genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
genpd->domain.ops.runtime_resume = genpd_runtime_resume;
genpd->domain.ops.prepare = genpd_prepare;
@@ -2017,26 +2076,22 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
genpd->dev_ops.start = pm_clk_resume;
}
+ /* The always-on governor works better with the corresponding flag. */
+ if (gov == &pm_domain_always_on_gov)
+ genpd->flags |= GENPD_FLAG_RPM_ALWAYS_ON;
+
/* Always-on domains must be powered on at initialization. */
if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) &&
!genpd_status_on(genpd))
return -EINVAL;
- if (genpd_is_cpu_domain(genpd) &&
- !zalloc_cpumask_var(&genpd->cpus, GFP_KERNEL))
- return -ENOMEM;
-
- /* Use only one "off" state if there were no states declared */
- if (genpd->state_count == 0) {
- ret = genpd_set_default_power_state(genpd);
- if (ret) {
- if (genpd_is_cpu_domain(genpd))
- free_cpumask_var(genpd->cpus);
- return ret;
- }
- } else if (!gov && genpd->state_count > 1) {
+ /* Multiple states but no governor doesn't make sense. */
+ if (!gov && genpd->state_count > 1)
pr_warn("%s: no governor for states\n", genpd->name);
- }
+
+ ret = genpd_alloc_data(genpd);
+ if (ret)
+ return ret;
device_initialize(&genpd->dev);
dev_set_name(&genpd->dev, "%s", genpd->name);
@@ -2081,10 +2136,7 @@ static int genpd_remove(struct generic_pm_domain *genpd)
genpd_unlock(genpd);
genpd_debug_remove(genpd);
cancel_work_sync(&genpd->power_off_work);
- if (genpd_is_cpu_domain(genpd))
- free_cpumask_var(genpd->cpus);
- if (genpd->free_states)
- genpd->free_states(genpd->states, genpd->state_count);
+ genpd_free_data(genpd);
pr_debug("%s: removed %s\n", __func__, genpd->name);
@@ -3163,6 +3215,7 @@ static int sub_domains_show(struct seq_file *s, void *data)
static int idle_states_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
+ u64 now, delta, idle_time = 0;
unsigned int i;
int ret = 0;
@@ -3173,17 +3226,19 @@ static int idle_states_show(struct seq_file *s, void *data)
seq_puts(s, "State Time Spent(ms) Usage Rejected\n");
for (i = 0; i < genpd->state_count; i++) {
- ktime_t delta = 0;
- s64 msecs;
+ idle_time += genpd->states[i].idle_time;
- if ((genpd->status == GENPD_STATE_OFF) &&
- (genpd->state_idx == i))
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
+ if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time) {
+ delta = now - genpd->accounting_time;
+ idle_time += delta;
+ }
+ }
- msecs = ktime_to_ms(
- ktime_add(genpd->states[i].idle_time, delta));
- seq_printf(s, "S%-13i %-14lld %-14llu %llu\n", i, msecs,
- genpd->states[i].usage, genpd->states[i].rejected);
+ do_div(idle_time, NSEC_PER_MSEC);
+ seq_printf(s, "S%-13i %-14llu %-14llu %llu\n", i, idle_time,
+ genpd->states[i].usage, genpd->states[i].rejected);
}
genpd_unlock(genpd);
@@ -3193,18 +3248,22 @@ static int idle_states_show(struct seq_file *s, void *data)
static int active_time_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
- ktime_t delta = 0;
+ u64 now, on_time, delta = 0;
int ret = 0;
ret = genpd_lock_interruptible(genpd);
if (ret)
return -ERESTARTSYS;
- if (genpd->status == GENPD_STATE_ON)
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
+ if (genpd->status == GENPD_STATE_ON) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time)
+ delta = now - genpd->accounting_time;
+ }
- seq_printf(s, "%lld ms\n", ktime_to_ms(
- ktime_add(genpd->on_time, delta)));
+ on_time = genpd->on_time + delta;
+ do_div(on_time, NSEC_PER_MSEC);
+ seq_printf(s, "%llu ms\n", on_time);
genpd_unlock(genpd);
return ret;
@@ -3213,7 +3272,7 @@ static int active_time_show(struct seq_file *s, void *data)
static int total_idle_time_show(struct seq_file *s, void *data)
{
struct generic_pm_domain *genpd = s->private;
- ktime_t delta = 0, total = 0;
+ u64 now, delta, total = 0;
unsigned int i;
int ret = 0;
@@ -3222,16 +3281,19 @@ static int total_idle_time_show(struct seq_file *s, void *data)
return -ERESTARTSYS;
for (i = 0; i < genpd->state_count; i++) {
+ total += genpd->states[i].idle_time;
- if ((genpd->status == GENPD_STATE_OFF) &&
- (genpd->state_idx == i))
- delta = ktime_sub(ktime_get(), genpd->accounting_time);
-
- total = ktime_add(total, genpd->states[i].idle_time);
+ if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) {
+ now = ktime_get_mono_fast_ns();
+ if (now > genpd->accounting_time) {
+ delta = now - genpd->accounting_time;
+ total += delta;
+ }
+ }
}
- total = ktime_add(total, delta);
- seq_printf(s, "%lld ms\n", ktime_to_ms(total));
+ do_div(total, NSEC_PER_MSEC);
+ seq_printf(s, "%llu ms\n", total);
genpd_unlock(genpd);
return ret;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index cd08c5885190..282a3a135827 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -18,6 +18,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
s64 constraint_ns;
if (dev->power.subsys_data && dev->power.subsys_data->domain_data) {
+ struct gpd_timing_data *td = dev_gpd_data(dev)->td;
+
/*
* Only take suspend-time QoS constraints of devices into
* account, because constraints updated after the device has
@@ -25,7 +27,8 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
* anyway. In order for them to take effect, the device has to
* be resumed and suspended again.
*/
- constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns;
+ constraint_ns = td ? td->effective_constraint_ns :
+ PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS;
} else {
/*
* The child is not in a domain and there's no info on its
@@ -49,7 +52,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
*/
static bool default_suspend_ok(struct device *dev)
{
- struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+ struct gpd_timing_data *td = dev_gpd_data(dev)->td;
unsigned long flags;
s64 constraint_ns;
@@ -136,26 +139,28 @@ static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t n
* is able to enter its optimal idle state.
*/
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
- next_wakeup = to_gpd_data(pdd)->next_wakeup;
+ next_wakeup = to_gpd_data(pdd)->td->next_wakeup;
if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
if (ktime_before(next_wakeup, domain_wakeup))
domain_wakeup = next_wakeup;
}
list_for_each_entry(link, &genpd->parent_links, parent_node) {
- next_wakeup = link->child->next_wakeup;
+ struct genpd_governor_data *cgd = link->child->gd;
+
+ next_wakeup = cgd ? cgd->next_wakeup : KTIME_MAX;
if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now))
if (ktime_before(next_wakeup, domain_wakeup))
domain_wakeup = next_wakeup;
}
- genpd->next_wakeup = domain_wakeup;
+ genpd->gd->next_wakeup = domain_wakeup;
}
static bool next_wakeup_allows_state(struct generic_pm_domain *genpd,
unsigned int state, ktime_t now)
{
- ktime_t domain_wakeup = genpd->next_wakeup;
+ ktime_t domain_wakeup = genpd->gd->next_wakeup;
s64 idle_time_ns, min_sleep_ns;
min_sleep_ns = genpd->states[state].power_off_latency_ns +
@@ -185,8 +190,9 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* All subdomains have been powered off already at this point.
*/
list_for_each_entry(link, &genpd->parent_links, parent_node) {
- struct generic_pm_domain *sd = link->child;
- s64 sd_max_off_ns = sd->max_off_time_ns;
+ struct genpd_governor_data *cgd = link->child->gd;
+
+ s64 sd_max_off_ns = cgd ? cgd->max_off_time_ns : -1;
if (sd_max_off_ns < 0)
continue;
@@ -215,7 +221,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* domain to turn off and on (that's how much time it will
* have to wait worst case).
*/
- td = &to_gpd_data(pdd)->td;
+ td = to_gpd_data(pdd)->td;
constraint_ns = td->effective_constraint_ns;
/*
* Zero means "no suspend at all" and this runs only when all
@@ -244,7 +250,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
* time and the time needed to turn the domain on is the maximum
* theoretical time this domain can spend in the "off" state.
*/
- genpd->max_off_time_ns = min_off_time_ns -
+ genpd->gd->max_off_time_ns = min_off_time_ns -
genpd->states[state].power_on_latency_ns;
return true;
}
@@ -259,6 +265,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
{
struct generic_pm_domain *genpd = pd_to_genpd(pd);
+ struct genpd_governor_data *gd = genpd->gd;
int state_idx = genpd->state_count - 1;
struct gpd_link *link;
@@ -269,11 +276,11 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
* cannot be met.
*/
update_domain_next_wakeup(genpd, now);
- if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) {
+ if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (gd->next_wakeup != KTIME_MAX)) {
/* Let's find out the deepest domain idle state, the devices prefer */
while (state_idx >= 0) {
if (next_wakeup_allows_state(genpd, state_idx, now)) {
- genpd->max_off_time_changed = true;
+ gd->max_off_time_changed = true;
break;
}
state_idx--;
@@ -281,14 +288,14 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
if (state_idx < 0) {
state_idx = 0;
- genpd->cached_power_down_ok = false;
+ gd->cached_power_down_ok = false;
goto done;
}
}
- if (!genpd->max_off_time_changed) {
- genpd->state_idx = genpd->cached_power_down_state_idx;
- return genpd->cached_power_down_ok;
+ if (!gd->max_off_time_changed) {
+ genpd->state_idx = gd->cached_power_down_state_idx;
+ return gd->cached_power_down_ok;
}
/*
@@ -297,12 +304,16 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
* going to be called for any parent until this instance
* returns.
*/
- list_for_each_entry(link, &genpd->child_links, child_node)
- link->parent->max_off_time_changed = true;
+ list_for_each_entry(link, &genpd->child_links, child_node) {
+ struct genpd_governor_data *pgd = link->parent->gd;
+
+ if (pgd)
+ pgd->max_off_time_changed = true;
+ }
- genpd->max_off_time_ns = -1;
- genpd->max_off_time_changed = false;
- genpd->cached_power_down_ok = true;
+ gd->max_off_time_ns = -1;
+ gd->max_off_time_changed = false;
+ gd->cached_power_down_ok = true;
/*
* Find a state to power down to, starting from the state
@@ -310,7 +321,7 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
*/
while (!__default_power_down_ok(pd, state_idx)) {
if (state_idx == 0) {
- genpd->cached_power_down_ok = false;
+ gd->cached_power_down_ok = false;
break;
}
state_idx--;
@@ -318,8 +329,8 @@ static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now)
done:
genpd->state_idx = state_idx;
- genpd->cached_power_down_state_idx = genpd->state_idx;
- return genpd->cached_power_down_ok;
+ gd->cached_power_down_state_idx = genpd->state_idx;
+ return gd->cached_power_down_ok;
}
static bool default_power_down_ok(struct dev_pm_domain *pd)
@@ -327,11 +338,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
return _default_power_down_ok(pd, ktime_get());
}
-static bool always_on_power_down_ok(struct dev_pm_domain *domain)
-{
- return false;
-}
-
#ifdef CONFIG_CPU_IDLE
static bool cpu_power_down_ok(struct dev_pm_domain *pd)
{
@@ -401,6 +407,5 @@ struct dev_power_governor simple_qos_governor = {
* pm_genpd_gov_always_on - A governor implementing an always-on policy
*/
struct dev_power_governor pm_domain_always_on_gov = {
- .power_down_ok = always_on_power_down_ok,
.suspend_ok = default_suspend_ok,
};
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index d4059e6ffeae..676dc72d912d 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -263,7 +263,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
retval = -EINVAL;
else if (dev->power.disable_depth > 0)
retval = -EACCES;
- else if (atomic_read(&dev->power.usage_count) > 0)
+ else if (atomic_read(&dev->power.usage_count))
retval = -EAGAIN;
else if (!dev->power.ignore_children &&
atomic_read(&dev->power.child_count))
@@ -1039,13 +1039,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
}
EXPORT_SYMBOL_GPL(pm_schedule_suspend);
+static int rpm_drop_usage_count(struct device *dev)
+{
+ int ret;
+
+ ret = atomic_sub_return(1, &dev->power.usage_count);
+ if (ret >= 0)
+ return ret;
+
+ /*
+ * Because rpm_resume() does not check the usage counter, it will resume
+ * the device even if the usage counter is 0 or negative, so it is
+ * sufficient to increment the usage counter here to reverse the change
+ * made above.
+ */
+ atomic_inc(&dev->power.usage_count);
+ dev_warn(dev, "Runtime PM usage count underflow!\n");
+ return -EINVAL;
+}
+
/**
* __pm_runtime_idle - Entry point for runtime idle operations.
* @dev: Device to send idle notification for.
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero. Then carry out an idle
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error). Then carry out an idle
* notification, either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1057,7 +1077,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
+ retval = rpm_drop_usage_count(dev);
+ if (retval < 0) {
+ return retval;
+ } else if (retval > 0) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
@@ -1079,7 +1102,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
* @rpmflags: Flag bits.
*
* If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero. Then carry out a suspend,
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error). Then carry out a suspend,
* either synchronous or asynchronous.
*
* This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1091,7 +1115,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
+ retval = rpm_drop_usage_count(dev);
+ if (retval < 0) {
+ return retval;
+ } else if (retval > 0) {
trace_rpm_usage_rcuidle(dev, rpmflags);
return 0;
}
@@ -1210,12 +1237,13 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
{
struct device *parent = dev->parent;
bool notify_parent = false;
+ unsigned long flags;
int error = 0;
if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
return -EINVAL;
- spin_lock_irq(&dev->power.lock);
+ spin_lock_irqsave(&dev->power.lock, flags);
/*
* Prevent PM-runtime from being enabled for the device or return an
@@ -1226,7 +1254,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
else
error = -EAGAIN;
- spin_unlock_irq(&dev->power.lock);
+ spin_unlock_irqrestore(&dev->power.lock, flags);
if (error)
return error;
@@ -1247,7 +1275,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
device_links_read_unlock(idx);
}
- spin_lock_irq(&dev->power.lock);
+ spin_lock_irqsave(&dev->power.lock, flags);
if (dev->power.runtime_status == status || !parent)
goto out_set;
@@ -1288,7 +1316,7 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
dev->power.runtime_error = 0;
out:
- spin_unlock_irq(&dev->power.lock);
+ spin_unlock_irqrestore(&dev->power.lock, flags);
if (notify_parent)
pm_request_idle(parent);
@@ -1527,14 +1555,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
*/
void pm_runtime_allow(struct device *dev)
{
+ int ret;
+
spin_lock_irq(&dev->power.lock);
if (dev->power.runtime_auto)
goto out;
dev->power.runtime_auto = true;
- if (atomic_dec_and_test(&dev->power.usage_count))
+ ret = rpm_drop_usage_count(dev);
+ if (ret == 0)
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
- else
+ else if (ret > 0)
trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
out:
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 82d370ae6a4a..d092c9bb4ba3 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -389,6 +389,27 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}
+static unsigned int cppc_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ struct cppc_cpudata *cpu_data = policy->driver_data;
+ unsigned int cpu = policy->cpu;
+ u32 desired_perf;
+ int ret;
+
+ desired_perf = cppc_cpufreq_khz_to_perf(cpu_data, target_freq);
+ cpu_data->perf_ctrls.desired_perf = desired_perf;
+ ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
+
+ if (ret) {
+ pr_debug("Failed to set target on CPU:%d. ret:%d\n",
+ cpu, ret);
+ return 0;
+ }
+
+ return target_freq;
+}
+
static int cppc_verify_policy(struct cpufreq_policy_data *policy)
{
cpufreq_verify_within_cpu_limits(policy);
@@ -420,12 +441,197 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static DEFINE_PER_CPU(unsigned int, efficiency_class);
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy);
+
+/* Create an artificial performance state every CPPC_EM_CAP_STEP capacity unit. */
+#define CPPC_EM_CAP_STEP (20)
+/* Increase the cost value by CPPC_EM_COST_STEP every performance state. */
+#define CPPC_EM_COST_STEP (1)
+/* Add a cost gap correspnding to the energy of 4 CPUs. */
+#define CPPC_EM_COST_GAP (4 * SCHED_CAPACITY_SCALE * CPPC_EM_COST_STEP \
+ / CPPC_EM_CAP_STEP)
+
+static unsigned int get_perf_level_count(struct cpufreq_policy *policy)
+{
+ struct cppc_perf_caps *perf_caps;
+ unsigned int min_cap, max_cap;
+ struct cppc_cpudata *cpu_data;
+ int cpu = policy->cpu;
+
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf, perf_caps->highest_perf);
+ if ((min_cap == 0) || (max_cap < min_cap))
+ return 0;
+ return 1 + max_cap / CPPC_EM_CAP_STEP - min_cap / CPPC_EM_CAP_STEP;
+}
+
+/*
+ * The cost is defined as:
+ * cost = power * max_frequency / frequency
+ */
+static inline unsigned long compute_cost(int cpu, int step)
+{
+ return CPPC_EM_COST_GAP * per_cpu(efficiency_class, cpu) +
+ step * CPPC_EM_COST_STEP;
+}
+
+static int cppc_get_cpu_power(struct device *cpu_dev,
+ unsigned long *power, unsigned long *KHz)
+{
+ unsigned long perf_step, perf_prev, perf, perf_check;
+ unsigned int min_step, max_step, step, step_check;
+ unsigned long prev_freq = *KHz;
+ unsigned int min_cap, max_cap;
+ struct cpufreq_policy *policy;
+
+ struct cppc_perf_caps *perf_caps;
+ struct cppc_cpudata *cpu_data;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+ min_cap = div_u64(max_cap * perf_caps->lowest_perf,
+ perf_caps->highest_perf);
+
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ min_step = min_cap / CPPC_EM_CAP_STEP;
+ max_step = max_cap / CPPC_EM_CAP_STEP;
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step = perf_prev / perf_step;
+
+ if (step > max_step)
+ return -EINVAL;
+
+ if (min_step == max_step) {
+ step = max_step;
+ perf = perf_caps->highest_perf;
+ } else if (step < min_step) {
+ step = min_step;
+ perf = perf_caps->lowest_perf;
+ } else {
+ step++;
+ if (step == max_step)
+ perf = perf_caps->highest_perf;
+ else
+ perf = step * perf_step;
+ }
+
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+
+ /*
+ * To avoid bad integer approximation, check that new frequency value
+ * increased and that the new frequency will be converted to the
+ * desired step value.
+ */
+ while ((*KHz == prev_freq) || (step_check != step)) {
+ perf++;
+ *KHz = cppc_cpufreq_perf_to_khz(cpu_data, perf);
+ perf_check = cppc_cpufreq_khz_to_perf(cpu_data, *KHz);
+ step_check = perf_check / perf_step;
+ }
+
+ /*
+ * With an artificial EM, only the cost value is used. Still the power
+ * is populated such as 0 < power < EM_MAX_POWER. This allows to add
+ * more sense to the artificial performance states.
+ */
+ *power = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int cppc_get_cpu_cost(struct device *cpu_dev, unsigned long KHz,
+ unsigned long *cost)
+{
+ unsigned long perf_step, perf_prev;
+ struct cppc_perf_caps *perf_caps;
+ struct cpufreq_policy *policy;
+ struct cppc_cpudata *cpu_data;
+ unsigned int max_cap;
+ int step;
+
+ policy = cpufreq_cpu_get_raw(cpu_dev->id);
+ cpu_data = policy->driver_data;
+ perf_caps = &cpu_data->perf_caps;
+ max_cap = arch_scale_cpu_capacity(cpu_dev->id);
+
+ perf_prev = cppc_cpufreq_khz_to_perf(cpu_data, KHz);
+ perf_step = CPPC_EM_CAP_STEP * perf_caps->highest_perf / max_cap;
+ step = perf_prev / perf_step;
+
+ *cost = compute_cost(cpu_dev->id, step);
+
+ return 0;
+}
+
+static int populate_efficiency_class(void)
+{
+ struct acpi_madt_generic_interrupt *gicc;
+ DECLARE_BITMAP(used_classes, 256) = {};
+ int class, cpu, index;
+
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ class = gicc->efficiency_class;
+ bitmap_set(used_classes, class, 1);
+ }
+
+ if (bitmap_weight(used_classes, 256) <= 1) {
+ pr_debug("Efficiency classes are all equal (=%d). "
+ "No EM registered", class);
+ return -EINVAL;
+ }
+
+ /*
+ * Squeeze efficiency class values on [0:#efficiency_class-1].
+ * Values are per spec in [0:255].
+ */
+ index = 0;
+ for_each_set_bit(class, used_classes, 256) {
+ for_each_possible_cpu(cpu) {
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->efficiency_class == class)
+ per_cpu(efficiency_class, cpu) = index;
+ }
+ index++;
+ }
+ cppc_cpufreq_driver.register_em = cppc_cpufreq_register_em;
+
+ return 0;
+}
+
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+ struct cppc_cpudata *cpu_data;
+ struct em_data_callback em_cb =
+ EM_ADV_DATA_CB(cppc_get_cpu_power, cppc_get_cpu_cost);
+
+ cpu_data = policy->driver_data;
+ em_dev_register_perf_domain(get_cpu_device(policy->cpu),
+ get_perf_level_count(policy), &em_cb,
+ cpu_data->shared_cpu_map, 0);
+}
+
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
+static int populate_efficiency_class(void)
+{
+ return 0;
+}
+static void cppc_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+}
#endif
@@ -536,6 +742,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto out;
}
+ policy->fast_switch_possible = cppc_allow_fast_switch();
+ policy->dvfs_possible_from_any_cpu = true;
+
/*
* If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost
* is supported.
@@ -681,6 +890,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
.verify = cppc_verify_policy,
.target = cppc_cpufreq_set_target,
.get = cppc_cpufreq_get_rate,
+ .fast_switch = cppc_cpufreq_fast_switch,
.init = cppc_cpufreq_cpu_init,
.exit = cppc_cpufreq_cpu_exit,
.set_boost = cppc_cpufreq_set_boost,
@@ -742,6 +952,7 @@ static int __init cppc_cpufreq_init(void)
cppc_check_hisi_workaround();
cppc_freq_invariance_init();
+ populate_efficiency_class();
ret = cpufreq_register_driver(&cppc_cpufreq_driver);
if (ret)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 80f535cc8a75..2cad42774164 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -28,6 +28,7 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/tick.h>
+#include <linux/units.h>
#include <trace/events/power.h>
static LIST_HEAD(cpufreq_policy_list);
@@ -947,13 +948,14 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret;
+ ssize_t ret = -EBUSY;
if (!fattr->show)
return -EIO;
down_read(&policy->rwsem);
- ret = fattr->show(policy, buf);
+ if (likely(!policy_is_inactive(policy)))
+ ret = fattr->show(policy, buf);
up_read(&policy->rwsem);
return ret;
@@ -964,7 +966,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
{
struct cpufreq_policy *policy = to_policy(kobj);
struct freq_attr *fattr = to_attr(attr);
- ssize_t ret = -EINVAL;
+ ssize_t ret = -EBUSY;
if (!fattr->store)
return -EIO;
@@ -978,7 +980,8 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
- ret = fattr->store(policy, buf, count);
+ if (likely(!policy_is_inactive(policy)))
+ ret = fattr->store(policy, buf, count);
up_write(&policy->rwsem);
}
@@ -1019,11 +1022,12 @@ static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
dev_err(dev, "cpufreq symlink creation failed\n");
}
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu,
struct device *dev)
{
dev_dbg(dev, "%s: Removing symlink\n", __func__);
sysfs_remove_link(&dev->kobj, "cpufreq");
+ cpumask_clear_cpu(cpu, policy->real_cpus);
}
static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -1337,12 +1341,12 @@ static int cpufreq_online(unsigned int cpu)
down_write(&policy->rwsem);
policy->cpu = cpu;
policy->governor = NULL;
- up_write(&policy->rwsem);
} else {
new_policy = true;
policy = cpufreq_policy_alloc(cpu);
if (!policy)
return -ENOMEM;
+ down_write(&policy->rwsem);
}
if (!new_policy && cpufreq_driver->online) {
@@ -1382,7 +1386,6 @@ static int cpufreq_online(unsigned int cpu)
cpumask_copy(policy->related_cpus, policy->cpus);
}
- down_write(&policy->rwsem);
/*
* affected cpus must always be the one, which are online. We aren't
* managing offline cpus here.
@@ -1531,9 +1534,9 @@ static int cpufreq_online(unsigned int cpu)
out_destroy_policy:
for_each_cpu(j, policy->real_cpus)
- remove_cpu_dev_symlink(policy, get_cpu_device(j));
+ remove_cpu_dev_symlink(policy, j, get_cpu_device(j));
- up_write(&policy->rwsem);
+ cpumask_clear(policy->cpus);
out_offline_policy:
if (cpufreq_driver->offline)
@@ -1544,6 +1547,8 @@ out_exit_policy:
cpufreq_driver->exit(policy);
out_free_policy:
+ up_write(&policy->rwsem);
+
cpufreq_policy_free(policy);
return ret;
}
@@ -1575,47 +1580,36 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
return 0;
}
-static int cpufreq_offline(unsigned int cpu)
+static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy)
{
- struct cpufreq_policy *policy;
int ret;
- pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
-
- policy = cpufreq_cpu_get_raw(cpu);
- if (!policy) {
- pr_debug("%s: No cpu_data found\n", __func__);
- return 0;
- }
-
- down_write(&policy->rwsem);
if (has_target())
cpufreq_stop_governor(policy);
cpumask_clear_cpu(cpu, policy->cpus);
- if (policy_is_inactive(policy)) {
- if (has_target())
- strncpy(policy->last_governor, policy->governor->name,
- CPUFREQ_NAME_LEN);
- else
- policy->last_policy = policy->policy;
- } else if (cpu == policy->cpu) {
- /* Nominate new CPU */
- policy->cpu = cpumask_any(policy->cpus);
- }
-
- /* Start governor again for active policy */
if (!policy_is_inactive(policy)) {
+ /* Nominate a new CPU if necessary. */
+ if (cpu == policy->cpu)
+ policy->cpu = cpumask_any(policy->cpus);
+
+ /* Start the governor again for the active policy. */
if (has_target()) {
ret = cpufreq_start_governor(policy);
if (ret)
pr_err("%s: Failed to start governor\n", __func__);
}
- goto unlock;
+ return;
}
+ if (has_target())
+ strncpy(policy->last_governor, policy->governor->name,
+ CPUFREQ_NAME_LEN);
+ else
+ policy->last_policy = policy->policy;
+
if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
cpufreq_cooling_unregister(policy->cdev);
policy->cdev = NULL;
@@ -1634,8 +1628,24 @@ static int cpufreq_offline(unsigned int cpu)
cpufreq_driver->exit(policy);
policy->freq_table = NULL;
}
+}
+
+static int cpufreq_offline(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+
+ pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
+
+ policy = cpufreq_cpu_get_raw(cpu);
+ if (!policy) {
+ pr_debug("%s: No cpu_data found\n", __func__);
+ return 0;
+ }
+
+ down_write(&policy->rwsem);
+
+ __cpufreq_offline(cpu, policy);
-unlock:
up_write(&policy->rwsem);
return 0;
}
@@ -1653,19 +1663,25 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
if (!policy)
return;
- if (cpu_online(cpu))
- cpufreq_offline(cpu);
+ down_write(&policy->rwsem);
- cpumask_clear_cpu(cpu, policy->real_cpus);
- remove_cpu_dev_symlink(policy, dev);
+ if (cpu_online(cpu))
+ __cpufreq_offline(cpu, policy);
- if (cpumask_empty(policy->real_cpus)) {
- /* We did light-weight exit earlier, do full tear down now */
- if (cpufreq_driver->offline)
- cpufreq_driver->exit(policy);
+ remove_cpu_dev_symlink(policy, cpu, dev);
- cpufreq_policy_free(policy);
+ if (!cpumask_empty(policy->real_cpus)) {
+ up_write(&policy->rwsem);
+ return;
}
+
+ /* We did light-weight exit earlier, do full tear down now */
+ if (cpufreq_driver->offline)
+ cpufreq_driver->exit(policy);
+
+ up_write(&policy->rwsem);
+
+ cpufreq_policy_free(policy);
}
/**
@@ -1707,6 +1723,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b
return new_freq;
if (policy->cur != new_freq) {
+ /*
+ * For some platforms, the frequency returned by hardware may be
+ * slightly different from what is provided in the frequency
+ * table, for example hardware may return 499 MHz instead of 500
+ * MHz. In such cases it is better to avoid getting into
+ * unnecessary frequency updates.
+ */
+ if (abs(policy->cur - new_freq) < HZ_PER_MHZ)
+ return policy->cur;
+
cpufreq_out_of_sync(policy, new_freq);
if (update)
schedule_work(&policy->update);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 0d42cf8b88d8..85da677c43d6 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
gov->free(policy_dbs);
}
+static void cpufreq_dbs_data_release(struct kobject *kobj)
+{
+ struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj));
+ struct dbs_governor *gov = dbs_data->gov;
+
+ gov->exit(dbs_data);
+ kfree(dbs_data);
+}
+
int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
{
struct dbs_governor *gov = dbs_governor_of(policy);
@@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
goto free_policy_dbs_info;
}
+ dbs_data->gov = gov;
gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
ret = gov->init(dbs_data);
@@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
policy->governor_data = policy_dbs;
gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
+ gov->kobj_type.release = cpufreq_dbs_data_release;
ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
get_governor_parent_kobj(policy),
"%s", gov->gov.name);
@@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
policy->governor_data = NULL;
- if (!count) {
- if (!have_governor_per_policy())
- gov->gdbs_data = NULL;
-
- gov->exit(dbs_data);
- kfree(dbs_data);
- }
+ if (!count && !have_governor_per_policy())
+ gov->gdbs_data = NULL;
free_policy_dbs_info(policy_dbs, gov);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index a5a0bc3cc23e..168c23fd7fca 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
/* Governor demand based switching data (per-policy or global). */
struct dbs_data {
struct gov_attr_set attr_set;
+ struct dbs_governor *gov;
void *tuners;
unsigned int ignore_nice_load;
unsigned int sampling_rate;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 846bb3a78788..57cdb3679885 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1322,6 +1322,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
mutex_unlock(&intel_pstate_limits_lock);
intel_pstate_update_policies();
+ arch_set_max_freq_ratio(global.no_turbo);
mutex_unlock(&intel_pstate_driver_lock);
@@ -2424,6 +2425,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
X86_MATCH(BROADWELL_X, core_funcs),
X86_MATCH(SKYLAKE_X, core_funcs),
X86_MATCH(ICELAKE_X, core_funcs),
+ X86_MATCH(SAPPHIRERAPIDS_X, core_funcs),
{}
};
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 0a94c56ddad2..813cccbfe934 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
-mtk_cpufreq_get_cpu_power(unsigned long *mW,
- unsigned long *KHz, struct device *cpu_dev)
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+ unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;
diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
index 815645170c4d..039a66bbe1be 100644
--- a/drivers/cpufreq/pasemi-cpufreq.c
+++ b/drivers/cpufreq/pasemi-cpufreq.c
@@ -18,7 +18,6 @@
#include <asm/hw_irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/smp.h>
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 4f20c6a9108d..20f64a8b0a35 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -24,7 +24,7 @@
#include <linux/device.h>
#include <linux/hardirq.h>
#include <linux/of_device.h>
-#include <asm/prom.h>
+
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/pmac_feature.h>
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index d7542a106e6b..ba9c31d98bd6 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -22,7 +22,7 @@
#include <linux/completion.h>
#include <linux/mutex.h>
#include <linux/of_device.h>
-#include <asm/prom.h>
+
#include <asm/machdep.h>
#include <asm/irq.h>
#include <asm/sections.h>
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.c b/drivers/cpufreq/ppc_cbe_cpufreq.c
index c58abb4cca3a..e3313ce63b38 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.c
@@ -12,7 +12,6 @@
#include <linux/of_platform.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/cell-regs.h>
#include "ppc_cbe_cpufreq.h"
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 037fe23bc6ed..4fba3637b115 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -13,9 +13,9 @@
#include <linux/init.h>
#include <linux/of_platform.h>
#include <linux/pm_qos.h>
+#include <linux/slab.h>
#include <asm/processor.h>
-#include <asm/prom.h>
#include <asm/pmi.h>
#include <asm/cell-regs.h>
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 919fa6e3f462..6d2a4cf46db7 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}
static int __maybe_unused
-scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
- struct device *cpu_dev)
+scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
+ unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 755bbdfc5b82..3db4fca1172b 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
struct generic_pm_domain *pd;
struct psci_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node);
if (!pd)
@@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index b51b5df08450..540105ca0781 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -23,6 +23,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/syscore_ops.h>
#include <asm/cpuidle.h>
@@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu)
return 0;
}
+static void psci_idle_syscore_switch(bool suspend)
+{
+ bool cleared = false;
+ struct device *dev;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev;
+
+ if (dev && suspend) {
+ dev_pm_genpd_suspend(dev);
+ } else if (dev) {
+ dev_pm_genpd_resume(dev);
+
+ /* Account for userspace having offlined a CPU. */
+ if (pm_runtime_status_suspended(dev))
+ pm_runtime_set_active(dev);
+
+ /* Clear domain state to re-start fresh. */
+ if (!cleared) {
+ psci_set_domain_state(0);
+ cleared = true;
+ }
+ }
+ }
+}
+
+static int psci_idle_syscore_suspend(void)
+{
+ psci_idle_syscore_switch(true);
+ return 0;
+}
+
+static void psci_idle_syscore_resume(void)
+{
+ psci_idle_syscore_switch(false);
+}
+
+static struct syscore_ops psci_idle_syscore_ops = {
+ .suspend = psci_idle_syscore_suspend,
+ .resume = psci_idle_syscore_resume,
+};
+
static void psci_idle_init_cpuhp(void)
{
int err;
@@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void)
if (!psci_cpuidle_use_cpuhp)
return;
+ register_syscore_ops(&psci_idle_syscore_ops);
+
err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING,
"cpuidle/psci:online",
psci_idle_cpuhp_up,
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 5c852e671992..1151e5e2ba82 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np)
struct generic_pm_domain *pd;
struct sbi_pd_provider *pd_provider;
struct dev_power_governor *pd_gov;
- int ret = -ENOMEM, state_count = 0;
+ int ret = -ENOMEM;
pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node);
if (!pd)
@@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np)
pd->flags |= GENPD_FLAG_ALWAYS_ON;
/* Use governor for CPU PM domains if it has some states to manage. */
- pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL;
+ pd_gov = pd->states ? &pm_domain_cpu_gov : NULL;
ret = pm_genpd_init(pd, pd_gov, false);
if (ret)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index a525a609dfc6..01474daf4548 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -112,16 +112,16 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
}
/**
- * get_freq_range() - Get the current freq range
+ * devfreq_get_freq_range() - Get the current freq range
* @devfreq: the devfreq instance
* @min_freq: the min frequency
* @max_freq: the max frequency
*
* This takes into consideration all constraints.
*/
-static void get_freq_range(struct devfreq *devfreq,
- unsigned long *min_freq,
- unsigned long *max_freq)
+void devfreq_get_freq_range(struct devfreq *devfreq,
+ unsigned long *min_freq,
+ unsigned long *max_freq)
{
unsigned long *freq_table = devfreq->profile->freq_table;
s32 qos_min_freq, qos_max_freq;
@@ -158,6 +158,7 @@ static void get_freq_range(struct devfreq *devfreq,
if (*min_freq > *max_freq)
*min_freq = *max_freq;
}
+EXPORT_SYMBOL(devfreq_get_freq_range);
/**
* devfreq_get_freq_level() - Lookup freq_table for the frequency
@@ -418,7 +419,7 @@ int devfreq_update_target(struct devfreq *devfreq, unsigned long freq)
err = devfreq->governor->get_target_freq(devfreq, &freq);
if (err)
return err;
- get_freq_range(devfreq, &min_freq, &max_freq);
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
if (freq < min_freq) {
freq = min_freq;
@@ -785,6 +786,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
{
struct devfreq *devfreq;
struct devfreq_governor *governor;
+ unsigned long min_freq, max_freq;
int err = 0;
if (!dev || !profile || !governor_name) {
@@ -849,6 +851,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
goto err_dev;
}
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
+
devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
devfreq->opp_table = dev_pm_opp_get_opp_table(dev);
if (IS_ERR(devfreq->opp_table))
@@ -1587,7 +1591,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
unsigned long min_freq, max_freq;
mutex_lock(&df->lock);
- get_freq_range(df, &min_freq, &max_freq);
+ devfreq_get_freq_range(df, &min_freq, &max_freq);
mutex_unlock(&df->lock);
return sprintf(buf, "%lu\n", min_freq);
@@ -1641,7 +1645,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
unsigned long min_freq, max_freq;
mutex_lock(&df->lock);
- get_freq_range(df, &min_freq, &max_freq);
+ devfreq_get_freq_range(df, &min_freq, &max_freq);
mutex_unlock(&df->lock);
return sprintf(buf, "%lu\n", max_freq);
@@ -1955,7 +1959,7 @@ static int devfreq_summary_show(struct seq_file *s, void *data)
mutex_lock(&devfreq->lock);
cur_freq = devfreq->previous_freq;
- get_freq_range(devfreq, &min_freq, &max_freq);
+ devfreq_get_freq_range(devfreq, &min_freq, &max_freq);
timer = devfreq->profile->timer;
if (IS_SUPPORTED_ATTR(devfreq->governor->attrs, POLLING_INTERVAL))
diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index 002a7d67e39d..0adfebc0467a 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -48,6 +48,31 @@
#define DEVFREQ_GOV_ATTR_TIMER BIT(1)
/**
+ * struct devfreq_cpu_data - Hold the per-cpu data
+ * @node: list node
+ * @dev: reference to cpu device.
+ * @first_cpu: the cpumask of the first cpu of a policy.
+ * @opp_table: reference to cpu opp table.
+ * @cur_freq: the current frequency of the cpu.
+ * @min_freq: the min frequency of the cpu.
+ * @max_freq: the max frequency of the cpu.
+ *
+ * This structure stores the required cpu_data of a cpu.
+ * This is auto-populated by the governor.
+ */
+struct devfreq_cpu_data {
+ struct list_head node;
+
+ struct device *dev;
+ unsigned int first_cpu;
+
+ struct opp_table *opp_table;
+ unsigned int cur_freq;
+ unsigned int min_freq;
+ unsigned int max_freq;
+};
+
+/**
* struct devfreq_governor - Devfreq policy governor
* @node: list node - contains registered devfreq governors
* @name: Governor's name
@@ -89,6 +114,8 @@ int devm_devfreq_add_governor(struct device *dev,
int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
int devfreq_update_target(struct devfreq *devfreq, unsigned long freq);
+void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq,
+ unsigned long *max_freq);
static inline int devfreq_update_stats(struct devfreq *df)
{
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index fc09324a03e0..72c67979ebe1 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0-only
+ // SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/devfreq/governor_passive.c
*
@@ -8,76 +8,129 @@
*/
#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
#include <linux/device.h>
#include <linux/devfreq.h>
#include "governor.h"
-static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
- unsigned long *freq)
+#define HZ_PER_KHZ 1000
+
+static struct devfreq_cpu_data *
+get_parent_cpu_data(struct devfreq_passive_data *p_data,
+ struct cpufreq_policy *policy)
{
- struct devfreq_passive_data *p_data
- = (struct devfreq_passive_data *)devfreq->data;
- struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
- unsigned long child_freq = ULONG_MAX;
- struct dev_pm_opp *opp, *p_opp;
- int i, count;
+ struct devfreq_cpu_data *parent_cpu_data;
- /*
- * If the devfreq device with passive governor has the specific method
- * to determine the next frequency, should use the get_target_freq()
- * of struct devfreq_passive_data.
- */
- if (p_data->get_target_freq)
- return p_data->get_target_freq(devfreq, freq);
+ if (!p_data || !policy)
+ return NULL;
- /*
- * If the parent and passive devfreq device uses the OPP table,
- * get the next frequency by using the OPP table.
- */
+ list_for_each_entry(parent_cpu_data, &p_data->cpu_data_list, node)
+ if (parent_cpu_data->first_cpu == cpumask_first(policy->related_cpus))
+ return parent_cpu_data;
- /*
- * - parent devfreq device uses the governors except for passive.
- * - passive devfreq device uses the passive governor.
- *
- * Each devfreq has the OPP table. After deciding the new frequency
- * from the governor of parent devfreq device, the passive governor
- * need to get the index of new frequency on OPP table of parent
- * device. And then the index is used for getting the suitable
- * new frequency for passive devfreq device.
- */
- if (!devfreq->profile || !devfreq->profile->freq_table
- || devfreq->profile->max_state <= 0)
- return -EINVAL;
+ return NULL;
+}
- /*
- * The passive governor have to get the correct frequency from OPP
- * list of parent device. Because in this case, *freq is temporary
- * value which is decided by ondemand governor.
- */
- if (devfreq->opp_table && parent_devfreq->opp_table) {
- p_opp = devfreq_recommended_opp(parent_devfreq->dev.parent,
- freq, 0);
- if (IS_ERR(p_opp))
- return PTR_ERR(p_opp);
+static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
+ struct opp_table *p_opp_table,
+ struct opp_table *opp_table,
+ unsigned long *freq)
+{
+ struct dev_pm_opp *opp = NULL, *p_opp = NULL;
+ unsigned long target_freq;
- opp = dev_pm_opp_xlate_required_opp(parent_devfreq->opp_table,
- devfreq->opp_table, p_opp);
- dev_pm_opp_put(p_opp);
+ if (!p_dev || !p_opp_table || !opp_table || !freq)
+ return 0;
- if (IS_ERR(opp))
- goto no_required_opp;
+ p_opp = devfreq_recommended_opp(p_dev, freq, 0);
+ if (IS_ERR(p_opp))
+ return 0;
- *freq = dev_pm_opp_get_freq(opp);
- dev_pm_opp_put(opp);
+ opp = dev_pm_opp_xlate_required_opp(p_opp_table, opp_table, p_opp);
+ dev_pm_opp_put(p_opp);
+ if (IS_ERR(opp))
return 0;
+
+ target_freq = dev_pm_opp_get_freq(opp);
+ dev_pm_opp_put(opp);
+
+ return target_freq;
+}
+
+static int get_target_freq_with_cpufreq(struct devfreq *devfreq,
+ unsigned long *target_freq)
+{
+ struct devfreq_passive_data *p_data =
+ (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_policy *policy;
+ unsigned long cpu, cpu_cur, cpu_min, cpu_max, cpu_percent;
+ unsigned long dev_min, dev_max;
+ unsigned long freq = 0;
+ int ret = 0;
+
+ for_each_online_cpu(cpu) {
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EINVAL;
+ continue;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ /* Get target freq via required opps */
+ cpu_cur = parent_cpu_data->cur_freq * HZ_PER_KHZ;
+ freq = get_target_freq_by_required_opp(parent_cpu_data->dev,
+ parent_cpu_data->opp_table,
+ devfreq->opp_table, &cpu_cur);
+ if (freq) {
+ *target_freq = max(freq, *target_freq);
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ /* Use interpolation if required opps is not available */
+ devfreq_get_freq_range(devfreq, &dev_min, &dev_max);
+
+ cpu_min = parent_cpu_data->min_freq;
+ cpu_max = parent_cpu_data->max_freq;
+ cpu_cur = parent_cpu_data->cur_freq;
+
+ cpu_percent = ((cpu_cur - cpu_min) * 100) / (cpu_max - cpu_min);
+ freq = dev_min + mult_frac(dev_max - dev_min, cpu_percent, 100);
+
+ *target_freq = max(freq, *target_freq);
+ cpufreq_cpu_put(policy);
}
-no_required_opp:
- /*
- * Get the OPP table's index of decided frequency by governor
- * of parent device.
- */
+ return ret;
+}
+
+static int get_target_freq_with_devfreq(struct devfreq *devfreq,
+ unsigned long *freq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+ unsigned long child_freq = ULONG_MAX;
+ int i, count;
+
+ /* Get target freq via required opps */
+ child_freq = get_target_freq_by_required_opp(parent_devfreq->dev.parent,
+ parent_devfreq->opp_table,
+ devfreq->opp_table, freq);
+ if (child_freq)
+ goto out;
+
+ /* Use interpolation if required opps is not available */
for (i = 0; i < parent_devfreq->profile->max_state; i++)
if (parent_devfreq->profile->freq_table[i] == *freq)
break;
@@ -85,7 +138,6 @@ no_required_opp:
if (i == parent_devfreq->profile->max_state)
return -EINVAL;
- /* Get the suitable frequency by using index of parent device. */
if (i < devfreq->profile->max_state) {
child_freq = devfreq->profile->freq_table[i];
} else {
@@ -93,12 +145,202 @@ no_required_opp:
child_freq = devfreq->profile->freq_table[count - 1];
}
- /* Return the suitable frequency for passive device. */
+out:
*freq = child_freq;
return 0;
}
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+ unsigned long *freq)
+{
+ struct devfreq_passive_data *p_data =
+ (struct devfreq_passive_data *)devfreq->data;
+ int ret;
+
+ if (!p_data)
+ return -EINVAL;
+
+ /*
+ * If the devfreq device with passive governor has the specific method
+ * to determine the next frequency, should use the get_target_freq()
+ * of struct devfreq_passive_data.
+ */
+ if (p_data->get_target_freq)
+ return p_data->get_target_freq(devfreq, freq);
+
+ switch (p_data->parent_type) {
+ case DEVFREQ_PARENT_DEV:
+ ret = get_target_freq_with_devfreq(devfreq, freq);
+ break;
+ case CPUFREQ_PARENT_DEV:
+ ret = get_target_freq_with_cpufreq(devfreq, freq);
+ break;
+ default:
+ ret = -EINVAL;
+ dev_err(&devfreq->dev, "Invalid parent type\n");
+ break;
+ }
+
+ return ret;
+}
+
+static int cpufreq_passive_notifier_call(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct devfreq_passive_data *p_data =
+ container_of(nb, struct devfreq_passive_data, nb);
+ struct devfreq *devfreq = (struct devfreq *)p_data->this;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_freqs *freqs = ptr;
+ unsigned int cur_freq;
+ int ret;
+
+ if (event != CPUFREQ_POSTCHANGE || !freqs)
+ return 0;
+
+ parent_cpu_data = get_parent_cpu_data(p_data, freqs->policy);
+ if (!parent_cpu_data || parent_cpu_data->cur_freq == freqs->new)
+ return 0;
+
+ cur_freq = parent_cpu_data->cur_freq;
+ parent_cpu_data->cur_freq = freqs->new;
+
+ mutex_lock(&devfreq->lock);
+ ret = devfreq_update_target(devfreq, freqs->new);
+ mutex_unlock(&devfreq->lock);
+ if (ret) {
+ parent_cpu_data->cur_freq = cur_freq;
+ dev_err(&devfreq->dev, "failed to update the frequency.\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq_cpu_data *parent_cpu_data;
+ int cpu, ret = 0;
+
+ if (p_data->nb.notifier_call) {
+ ret = cpufreq_unregister_notifier(&p_data->nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret < 0)
+ return ret;
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EINVAL;
+ continue;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ list_del(&parent_cpu_data->node);
+ if (parent_cpu_data->opp_table)
+ dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
+ kfree(parent_cpu_data);
+ cpufreq_cpu_put(policy);
+ }
+
+ return ret;
+}
+
+static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct device *dev = devfreq->dev.parent;
+ struct opp_table *opp_table = NULL;
+ struct devfreq_cpu_data *parent_cpu_data;
+ struct cpufreq_policy *policy;
+ struct device *cpu_dev;
+ unsigned int cpu;
+ int ret;
+
+ p_data->cpu_data_list
+ = (struct list_head)LIST_HEAD_INIT(p_data->cpu_data_list);
+
+ p_data->nb.notifier_call = cpufreq_passive_notifier_call;
+ ret = cpufreq_register_notifier(&p_data->nb, CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret) {
+ dev_err(dev, "failed to register cpufreq notifier\n");
+ p_data->nb.notifier_call = NULL;
+ goto err;
+ }
+
+ for_each_possible_cpu(cpu) {
+ policy = cpufreq_cpu_get(cpu);
+ if (!policy) {
+ ret = -EPROBE_DEFER;
+ goto err;
+ }
+
+ parent_cpu_data = get_parent_cpu_data(p_data, policy);
+ if (parent_cpu_data) {
+ cpufreq_cpu_put(policy);
+ continue;
+ }
+
+ parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
+ GFP_KERNEL);
+ if (!parent_cpu_data) {
+ ret = -ENOMEM;
+ goto err_put_policy;
+ }
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev) {
+ dev_err(dev, "failed to get cpu device\n");
+ ret = -ENODEV;
+ goto err_free_cpu_data;
+ }
+
+ opp_table = dev_pm_opp_get_opp_table(cpu_dev);
+ if (IS_ERR(opp_table)) {
+ dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
+ ret = PTR_ERR(opp_table);
+ goto err_free_cpu_data;
+ }
+
+ parent_cpu_data->dev = cpu_dev;
+ parent_cpu_data->opp_table = opp_table;
+ parent_cpu_data->first_cpu = cpumask_first(policy->related_cpus);
+ parent_cpu_data->cur_freq = policy->cur;
+ parent_cpu_data->min_freq = policy->cpuinfo.min_freq;
+ parent_cpu_data->max_freq = policy->cpuinfo.max_freq;
+
+ list_add_tail(&parent_cpu_data->node, &p_data->cpu_data_list);
+ cpufreq_cpu_put(policy);
+ }
+
+ mutex_lock(&devfreq->lock);
+ ret = devfreq_update_target(devfreq, 0L);
+ mutex_unlock(&devfreq->lock);
+ if (ret)
+ dev_err(dev, "failed to update the frequency\n");
+
+ return ret;
+
+err_free_cpu_data:
+ kfree(parent_cpu_data);
+err_put_policy:
+ cpufreq_cpu_put(policy);
+err:
+ WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
+
+ return ret;
+}
+
static int devfreq_passive_notifier_call(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -131,30 +373,55 @@ static int devfreq_passive_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
-static int devfreq_passive_event_handler(struct devfreq *devfreq,
- unsigned int event, void *data)
+static int devfreq_passive_unregister_notifier(struct devfreq *devfreq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent = (struct devfreq *)p_data->parent;
+ struct notifier_block *nb = &p_data->nb;
+
+ return devfreq_unregister_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_register_notifier(struct devfreq *devfreq)
{
struct devfreq_passive_data *p_data
= (struct devfreq_passive_data *)devfreq->data;
struct devfreq *parent = (struct devfreq *)p_data->parent;
struct notifier_block *nb = &p_data->nb;
- int ret = 0;
if (!parent)
return -EPROBE_DEFER;
+ nb->notifier_call = devfreq_passive_notifier_call;
+ return devfreq_register_notifier(parent, nb, DEVFREQ_TRANSITION_NOTIFIER);
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+ unsigned int event, void *data)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ int ret = 0;
+
+ if (!p_data)
+ return -EINVAL;
+
+ if (!p_data->this)
+ p_data->this = devfreq;
+
switch (event) {
case DEVFREQ_GOV_START:
- if (!p_data->this)
- p_data->this = devfreq;
-
- nb->notifier_call = devfreq_passive_notifier_call;
- ret = devfreq_register_notifier(parent, nb,
- DEVFREQ_TRANSITION_NOTIFIER);
+ if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+ ret = devfreq_passive_register_notifier(devfreq);
+ else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+ ret = cpufreq_passive_register_notifier(devfreq);
break;
case DEVFREQ_GOV_STOP:
- WARN_ON(devfreq_unregister_notifier(parent, nb,
- DEVFREQ_TRANSITION_NOTIFIER));
+ if (p_data->parent_type == DEVFREQ_PARENT_DEV)
+ WARN_ON(devfreq_passive_unregister_notifier(devfreq));
+ else if (p_data->parent_type == CPUFREQ_PARENT_DEV)
+ WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
break;
default:
break;
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 293857ebfd75..daff40702615 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -5,6 +5,7 @@
*/
#include <linux/arm-smccc.h>
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/devfreq.h>
@@ -20,55 +21,49 @@
#include <linux/rwsem.h>
#include <linux/suspend.h>
+#include <soc/rockchip/pm_domains.h>
#include <soc/rockchip/rk3399_grf.h>
#include <soc/rockchip/rockchip_sip.h>
-struct dram_timing {
- unsigned int ddr3_speed_bin;
- unsigned int pd_idle;
- unsigned int sr_idle;
- unsigned int sr_mc_gate_idle;
- unsigned int srpd_lite_idle;
- unsigned int standby_idle;
- unsigned int auto_pd_dis_freq;
- unsigned int dram_dll_dis_freq;
- unsigned int phy_dll_dis_freq;
- unsigned int ddr3_odt_dis_freq;
- unsigned int ddr3_drv;
- unsigned int ddr3_odt;
- unsigned int phy_ddr3_ca_drv;
- unsigned int phy_ddr3_dq_drv;
- unsigned int phy_ddr3_odt;
- unsigned int lpddr3_odt_dis_freq;
- unsigned int lpddr3_drv;
- unsigned int lpddr3_odt;
- unsigned int phy_lpddr3_ca_drv;
- unsigned int phy_lpddr3_dq_drv;
- unsigned int phy_lpddr3_odt;
- unsigned int lpddr4_odt_dis_freq;
- unsigned int lpddr4_drv;
- unsigned int lpddr4_dq_odt;
- unsigned int lpddr4_ca_odt;
- unsigned int phy_lpddr4_ca_drv;
- unsigned int phy_lpddr4_ck_cs_drv;
- unsigned int phy_lpddr4_dq_drv;
- unsigned int phy_lpddr4_odt;
-};
+#define NS_TO_CYCLE(NS, MHz) (((NS) * (MHz)) / NSEC_PER_USEC)
+
+#define RK3399_SET_ODT_PD_0_SR_IDLE GENMASK(7, 0)
+#define RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE GENMASK(15, 8)
+#define RK3399_SET_ODT_PD_0_STANDBY_IDLE GENMASK(31, 16)
+
+#define RK3399_SET_ODT_PD_1_PD_IDLE GENMASK(11, 0)
+#define RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE GENMASK(27, 16)
+
+#define RK3399_SET_ODT_PD_2_ODT_ENABLE BIT(0)
struct rk3399_dmcfreq {
struct device *dev;
struct devfreq *devfreq;
+ struct devfreq_dev_profile profile;
struct devfreq_simple_ondemand_data ondemand_data;
struct clk *dmc_clk;
struct devfreq_event_dev *edev;
struct mutex lock;
- struct dram_timing timing;
struct regulator *vdd_center;
struct regmap *regmap_pmu;
unsigned long rate, target_rate;
unsigned long volt, target_volt;
unsigned int odt_dis_freq;
- int odt_pd_arg0, odt_pd_arg1;
+
+ unsigned int pd_idle_ns;
+ unsigned int sr_idle_ns;
+ unsigned int sr_mc_gate_idle_ns;
+ unsigned int srpd_lite_idle_ns;
+ unsigned int standby_idle_ns;
+ unsigned int ddr3_odt_dis_freq;
+ unsigned int lpddr3_odt_dis_freq;
+ unsigned int lpddr4_odt_dis_freq;
+
+ unsigned int pd_idle_dis_freq;
+ unsigned int sr_idle_dis_freq;
+ unsigned int sr_mc_gate_idle_dis_freq;
+ unsigned int srpd_lite_idle_dis_freq;
+ unsigned int standby_idle_dis_freq;
};
static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
@@ -78,10 +73,14 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
struct dev_pm_opp *opp;
unsigned long old_clk_rate = dmcfreq->rate;
unsigned long target_volt, target_rate;
+ unsigned int ddrcon_mhz;
struct arm_smccc_res res;
- bool odt_enable = false;
int err;
+ u32 odt_pd_arg0 = 0;
+ u32 odt_pd_arg1 = 0;
+ u32 odt_pd_arg2 = 0;
+
opp = devfreq_recommended_opp(dev, freq, flags);
if (IS_ERR(opp))
return PTR_ERR(opp);
@@ -95,19 +94,71 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
mutex_lock(&dmcfreq->lock);
+ /*
+ * Ensure power-domain transitions don't interfere with ARM Trusted
+ * Firmware power-domain idling.
+ */
+ err = rockchip_pmu_block();
+ if (err) {
+ dev_err(dev, "Failed to block PMU: %d\n", err);
+ goto out_unlock;
+ }
+
+ /*
+ * Some idle parameters may be based on the DDR controller clock, which
+ * is half of the DDR frequency.
+ * pd_idle and standby_idle are based on the controller clock cycle.
+ * sr_idle_cycle, sr_mc_gate_idle_cycle, and srpd_lite_idle_cycle
+ * are based on the 1024 controller clock cycle
+ */
+ ddrcon_mhz = target_rate / USEC_PER_SEC / 2;
+
+ u32p_replace_bits(&odt_pd_arg1,
+ NS_TO_CYCLE(dmcfreq->pd_idle_ns, ddrcon_mhz),
+ RK3399_SET_ODT_PD_1_PD_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ NS_TO_CYCLE(dmcfreq->standby_idle_ns, ddrcon_mhz),
+ RK3399_SET_ODT_PD_0_STANDBY_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_0_SR_IDLE);
+ u32p_replace_bits(&odt_pd_arg0,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->sr_mc_gate_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE);
+ u32p_replace_bits(&odt_pd_arg1,
+ DIV_ROUND_UP(NS_TO_CYCLE(dmcfreq->srpd_lite_idle_ns,
+ ddrcon_mhz), 1024),
+ RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE);
+
if (dmcfreq->regmap_pmu) {
+ if (target_rate >= dmcfreq->sr_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_IDLE;
+
+ if (target_rate >= dmcfreq->sr_mc_gate_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_SR_MC_GATE_IDLE;
+
+ if (target_rate >= dmcfreq->standby_idle_dis_freq)
+ odt_pd_arg0 &= ~RK3399_SET_ODT_PD_0_STANDBY_IDLE;
+
+ if (target_rate >= dmcfreq->pd_idle_dis_freq)
+ odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_PD_IDLE;
+
+ if (target_rate >= dmcfreq->srpd_lite_idle_dis_freq)
+ odt_pd_arg1 &= ~RK3399_SET_ODT_PD_1_SRPD_LITE_IDLE;
+
if (target_rate >= dmcfreq->odt_dis_freq)
- odt_enable = true;
+ odt_pd_arg2 |= RK3399_SET_ODT_PD_2_ODT_ENABLE;
/*
* This makes a SMC call to the TF-A to set the DDR PD
* (power-down) timings and to enable or disable the
* ODT (on-die termination) resistors.
*/
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, dmcfreq->odt_pd_arg0,
- dmcfreq->odt_pd_arg1,
- ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD,
- odt_enable, 0, 0, 0, &res);
+ arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, odt_pd_arg0, odt_pd_arg1,
+ ROCKCHIP_SIP_CONFIG_DRAM_SET_ODT_PD, odt_pd_arg2,
+ 0, 0, 0, &res);
}
/*
@@ -158,6 +209,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
dmcfreq->volt = target_volt;
out:
+ rockchip_pmu_unblock();
+out_unlock:
mutex_unlock(&dmcfreq->lock);
return err;
}
@@ -189,13 +242,6 @@ static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq)
return 0;
}
-static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = {
- .polling_ms = 200,
- .target = rk3399_dmcfreq_target,
- .get_dev_status = rk3399_dmcfreq_get_dev_status,
- .get_cur_freq = rk3399_dmcfreq_get_cur_freq,
-};
-
static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev)
{
struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
@@ -238,69 +284,48 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
rk3399_dmcfreq_resume);
-static int of_get_ddr_timings(struct dram_timing *timing,
- struct device_node *np)
+static int rk3399_dmcfreq_of_props(struct rk3399_dmcfreq *data,
+ struct device_node *np)
{
int ret = 0;
- ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin",
- &timing->ddr3_speed_bin);
- ret |= of_property_read_u32(np, "rockchip,pd_idle",
- &timing->pd_idle);
- ret |= of_property_read_u32(np, "rockchip,sr_idle",
- &timing->sr_idle);
- ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle",
- &timing->sr_mc_gate_idle);
- ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle",
- &timing->srpd_lite_idle);
- ret |= of_property_read_u32(np, "rockchip,standby_idle",
- &timing->standby_idle);
- ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq",
- &timing->auto_pd_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq",
- &timing->dram_dll_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq",
- &timing->phy_dll_dis_freq);
+ /*
+ * These are all optional, and serve as minimum bounds. Give them large
+ * (i.e., never "disabled") values if the DT doesn't specify one.
+ */
+ data->pd_idle_dis_freq =
+ data->sr_idle_dis_freq =
+ data->sr_mc_gate_idle_dis_freq =
+ data->srpd_lite_idle_dis_freq =
+ data->standby_idle_dis_freq = UINT_MAX;
+
+ ret |= of_property_read_u32(np, "rockchip,pd-idle-ns",
+ &data->pd_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,sr-idle-ns",
+ &data->sr_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-ns",
+ &data->sr_mc_gate_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-ns",
+ &data->srpd_lite_idle_ns);
+ ret |= of_property_read_u32(np, "rockchip,standby-idle-ns",
+ &data->standby_idle_ns);
ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq",
- &timing->ddr3_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,ddr3_drv",
- &timing->ddr3_drv);
- ret |= of_property_read_u32(np, "rockchip,ddr3_odt",
- &timing->ddr3_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv",
- &timing->phy_ddr3_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv",
- &timing->phy_ddr3_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt",
- &timing->phy_ddr3_odt);
+ &data->ddr3_odt_dis_freq);
ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq",
- &timing->lpddr3_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,lpddr3_drv",
- &timing->lpddr3_drv);
- ret |= of_property_read_u32(np, "rockchip,lpddr3_odt",
- &timing->lpddr3_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv",
- &timing->phy_lpddr3_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv",
- &timing->phy_lpddr3_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt",
- &timing->phy_lpddr3_odt);
+ &data->lpddr3_odt_dis_freq);
ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq",
- &timing->lpddr4_odt_dis_freq);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_drv",
- &timing->lpddr4_drv);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt",
- &timing->lpddr4_dq_odt);
- ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt",
- &timing->lpddr4_ca_odt);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv",
- &timing->phy_lpddr4_ca_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv",
- &timing->phy_lpddr4_ck_cs_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv",
- &timing->phy_lpddr4_dq_drv);
- ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt",
- &timing->phy_lpddr4_odt);
+ &data->lpddr4_odt_dis_freq);
+
+ ret |= of_property_read_u32(np, "rockchip,pd-idle-dis-freq-hz",
+ &data->pd_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,sr-idle-dis-freq-hz",
+ &data->sr_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,sr-mc-gate-idle-dis-freq-hz",
+ &data->sr_mc_gate_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,srpd-lite-idle-dis-freq-hz",
+ &data->srpd_lite_idle_dis_freq);
+ ret |= of_property_read_u32(np, "rockchip,standby-idle-dis-freq-hz",
+ &data->standby_idle_dis_freq);
return ret;
}
@@ -311,8 +336,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = pdev->dev.of_node, *node;
struct rk3399_dmcfreq *data;
- int ret, index, size;
- uint32_t *timing;
+ int ret;
struct dev_pm_opp *opp;
u32 ddr_type;
u32 val;
@@ -343,26 +367,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
return ret;
}
- /*
- * Get dram timing and pass it to arm trust firmware,
- * the dram driver in arm trust firmware will get these
- * timing and to do dram initial.
- */
- if (!of_get_ddr_timings(&data->timing, np)) {
- timing = &data->timing.ddr3_speed_bin;
- size = sizeof(struct dram_timing) / 4;
- for (index = 0; index < size; index++) {
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index,
- ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM,
- 0, 0, 0, 0, &res);
- if (res.a0) {
- dev_err(dev, "Failed to set dram param: %ld\n",
- res.a0);
- ret = -EINVAL;
- goto err_edev;
- }
- }
- }
+ rk3399_dmcfreq_of_props(data, np);
node = of_parse_phandle(np, "rockchip,pmu", 0);
if (!node)
@@ -381,13 +386,13 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
switch (ddr_type) {
case RK3399_PMUGRF_DDRTYPE_DDR3:
- data->odt_dis_freq = data->timing.ddr3_odt_dis_freq;
+ data->odt_dis_freq = data->ddr3_odt_dis_freq;
break;
case RK3399_PMUGRF_DDRTYPE_LPDDR3:
- data->odt_dis_freq = data->timing.lpddr3_odt_dis_freq;
+ data->odt_dis_freq = data->lpddr3_odt_dis_freq;
break;
case RK3399_PMUGRF_DDRTYPE_LPDDR4:
- data->odt_dis_freq = data->timing.lpddr4_odt_dis_freq;
+ data->odt_dis_freq = data->lpddr4_odt_dis_freq;
break;
default:
ret = -EINVAL;
@@ -400,62 +405,45 @@ no_pmu:
0, 0, 0, 0, &res);
/*
- * In TF-A there is a platform SIP call to set the PD (power-down)
- * timings and to enable or disable the ODT (on-die termination).
- * This call needs three arguments as follows:
- *
- * arg0:
- * bit[0-7] : sr_idle
- * bit[8-15] : sr_mc_gate_idle
- * bit[16-31] : standby idle
- * arg1:
- * bit[0-11] : pd_idle
- * bit[16-27] : srpd_lite_idle
- * arg2:
- * bit[0] : odt enable
- */
- data->odt_pd_arg0 = (data->timing.sr_idle & 0xff) |
- ((data->timing.sr_mc_gate_idle & 0xff) << 8) |
- ((data->timing.standby_idle & 0xffff) << 16);
- data->odt_pd_arg1 = (data->timing.pd_idle & 0xfff) |
- ((data->timing.srpd_lite_idle & 0xfff) << 16);
-
- /*
* We add a devfreq driver to our parent since it has a device tree node
* with operating points.
*/
- if (dev_pm_opp_of_add_table(dev)) {
+ if (devm_pm_opp_of_add_table(dev)) {
dev_err(dev, "Invalid operating-points in device tree.\n");
ret = -EINVAL;
goto err_edev;
}
- of_property_read_u32(np, "upthreshold",
- &data->ondemand_data.upthreshold);
- of_property_read_u32(np, "downdifferential",
- &data->ondemand_data.downdifferential);
+ data->ondemand_data.upthreshold = 25;
+ data->ondemand_data.downdifferential = 15;
data->rate = clk_get_rate(data->dmc_clk);
opp = devfreq_recommended_opp(dev, &data->rate, 0);
if (IS_ERR(opp)) {
ret = PTR_ERR(opp);
- goto err_free_opp;
+ goto err_edev;
}
data->rate = dev_pm_opp_get_freq(opp);
data->volt = dev_pm_opp_get_voltage(opp);
dev_pm_opp_put(opp);
- rk3399_devfreq_dmc_profile.initial_freq = data->rate;
+ data->profile = (struct devfreq_dev_profile) {
+ .polling_ms = 200,
+ .target = rk3399_dmcfreq_target,
+ .get_dev_status = rk3399_dmcfreq_get_dev_status,
+ .get_cur_freq = rk3399_dmcfreq_get_cur_freq,
+ .initial_freq = data->rate,
+ };
data->devfreq = devm_devfreq_add_device(dev,
- &rk3399_devfreq_dmc_profile,
+ &data->profile,
DEVFREQ_GOV_SIMPLE_ONDEMAND,
&data->ondemand_data);
if (IS_ERR(data->devfreq)) {
ret = PTR_ERR(data->devfreq);
- goto err_free_opp;
+ goto err_edev;
}
devm_devfreq_register_opp_notifier(dev, data->devfreq);
@@ -465,8 +453,6 @@ no_pmu:
return 0;
-err_free_opp:
- dev_pm_opp_of_remove_table(&pdev->dev);
err_edev:
devfreq_event_disable_edev(data->edev);
@@ -477,11 +463,7 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev)
{
struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
- /*
- * Before remove the opp table we need to unregister the opp notifier.
- */
- devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq);
- dev_pm_opp_of_remove_table(dmcfreq->dev);
+ devfreq_event_disable_edev(dmcfreq->edev);
return 0;
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 47551ab73ca8..b9bb94bd0f67 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -765,6 +765,106 @@ static struct cpuidle_state icx_cstates[] __initdata = {
};
/*
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
+ *
+ * By default we enable C1E and disable C1 by marking it with
+ * 'CPUIDLE_FLAG_UNUSABLE'.
+ */
+static struct cpuidle_state adl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 220,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 280,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 680,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 200,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 230,
+ .target_residency = 700,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+/*
* On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
* versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
* MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
@@ -1147,6 +1247,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_adl __initconst = {
+ .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+ .state_table = adl_l_cstates,
+};
+
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
@@ -1215,6 +1323,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
@@ -1574,6 +1684,25 @@ static void __init skx_idle_state_table_update(void)
}
/**
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
+ */
+static void __init adl_idle_state_table_update(void)
+{
+ /* Check if user prefers C1 over C1E. */
+ if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+ cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
+
+ /* Disable C1E by clearing the "C1E promotion" bit. */
+ c1e_promotion = C1E_PROMOTION_DISABLE;
+ return;
+ }
+
+ /* Make sure C1E is enabled by default */
+ c1e_promotion = C1E_PROMOTION_ENABLE;
+}
+
+/**
* spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
*/
static void __init spr_idle_state_table_update(void)
@@ -1642,6 +1771,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
spr_idle_state_table_update();
break;
+ case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
+ adl_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
diff --git a/drivers/iio/chemical/scd30.h b/drivers/iio/chemical/scd30.h
index f60127bfe0f4..1ac9f3f79271 100644
--- a/drivers/iio/chemical/scd30.h
+++ b/drivers/iio/chemical/scd30.h
@@ -68,10 +68,7 @@ struct scd30_state {
scd30_command_t command;
};
-int scd30_suspend(struct device *dev);
-int scd30_resume(struct device *dev);
-
-static __maybe_unused SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume);
+extern const struct dev_pm_ops scd30_pm_ops;
int scd30_probe(struct device *dev, int irq, const char *name, void *priv, scd30_command_t command);
diff --git a/drivers/iio/chemical/scd30_core.c b/drivers/iio/chemical/scd30_core.c
index 9fe6bbe9ee04..682fca39d14d 100644
--- a/drivers/iio/chemical/scd30_core.c
+++ b/drivers/iio/chemical/scd30_core.c
@@ -517,7 +517,7 @@ static const struct iio_chan_spec scd30_channels[] = {
IIO_CHAN_SOFT_TIMESTAMP(3),
};
-int __maybe_unused scd30_suspend(struct device *dev)
+static int scd30_suspend(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct scd30_state *state = iio_priv(indio_dev);
@@ -529,9 +529,8 @@ int __maybe_unused scd30_suspend(struct device *dev)
return regulator_disable(state->vdd);
}
-EXPORT_SYMBOL(scd30_suspend);
-int __maybe_unused scd30_resume(struct device *dev)
+static int scd30_resume(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct scd30_state *state = iio_priv(indio_dev);
@@ -543,7 +542,8 @@ int __maybe_unused scd30_resume(struct device *dev)
return scd30_command_write(state, CMD_START_MEAS, state->pressure_comp);
}
-EXPORT_SYMBOL(scd30_resume);
+
+EXPORT_NS_SIMPLE_DEV_PM_OPS(scd30_pm_ops, scd30_suspend, scd30_resume, IIO_SCD30);
static void scd30_stop_meas(void *data)
{
@@ -759,7 +759,7 @@ int scd30_probe(struct device *dev, int irq, const char *name, void *priv,
return devm_iio_device_register(dev, indio_dev);
}
-EXPORT_SYMBOL(scd30_probe);
+EXPORT_SYMBOL_NS(scd30_probe, IIO_SCD30);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor core driver");
diff --git a/drivers/iio/chemical/scd30_i2c.c b/drivers/iio/chemical/scd30_i2c.c
index 875892a070ee..bae479a4721f 100644
--- a/drivers/iio/chemical/scd30_i2c.c
+++ b/drivers/iio/chemical/scd30_i2c.c
@@ -128,7 +128,7 @@ static struct i2c_driver scd30_i2c_driver = {
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = scd30_i2c_of_match,
- .pm = &scd30_pm_ops,
+ .pm = pm_sleep_ptr(&scd30_pm_ops),
},
.probe_new = scd30_i2c_probe,
};
@@ -137,3 +137,4 @@ module_i2c_driver(scd30_i2c_driver);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor i2c driver");
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
diff --git a/drivers/iio/chemical/scd30_serial.c b/drivers/iio/chemical/scd30_serial.c
index 568b34486c44..3c519103d30b 100644
--- a/drivers/iio/chemical/scd30_serial.c
+++ b/drivers/iio/chemical/scd30_serial.c
@@ -252,7 +252,7 @@ static struct serdev_device_driver scd30_serdev_driver = {
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = scd30_serdev_of_match,
- .pm = &scd30_pm_ops,
+ .pm = pm_sleep_ptr(&scd30_pm_ops),
},
.probe = scd30_serdev_probe,
};
@@ -261,3 +261,4 @@ module_serdev_device_driver(scd30_serdev_driver);
MODULE_AUTHOR("Tomasz Duszynski <tomasz.duszynski@octakon.com>");
MODULE_DESCRIPTION("Sensirion SCD30 carbon dioxide sensor serial driver");
MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(IIO_SCD30);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 440ab5a03df9..485ea980bde7 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
-_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
+_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
-static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
- struct device *dev)
+static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+ unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index bca2f912d349..f5eced0842b3 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;
pd = em_cpu_get(cpu);
- if (!pd)
+ if (!pd || em_is_artificial(pd))
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 07611a00b78f..a9c99d9e8b42 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1010,7 +1010,7 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
- return (value) ? value *= rp->time_unit : rp->time_unit;
+ return (value) ? value * rp->time_unit : rp->time_unit;
value = div64_u64(value, rp->time_unit);
@@ -1107,6 +1107,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 1be45f36ab6c..9d23984d8931 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -140,6 +140,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
{}
};
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 0868b7d406fb..b1cf7d29dafd 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -8,6 +8,7 @@
#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/err.h>
+#include <linux/mutex.h>
#include <linux/pm_clock.h>
#include <linux/pm_domain.h>
#include <linux/of_address.h>
@@ -16,6 +17,7 @@
#include <linux/clk.h>
#include <linux/regmap.h>
#include <linux/mfd/syscon.h>
+#include <soc/rockchip/pm_domains.h>
#include <dt-bindings/power/px30-power.h>
#include <dt-bindings/power/rk3036-power.h>
#include <dt-bindings/power/rk3066-power.h>
@@ -139,6 +141,109 @@ struct rockchip_pmu {
#define DOMAIN_RK3568(name, pwr, req, wakeup) \
DOMAIN_M(name, pwr, pwr, req, req, req, wakeup)
+/*
+ * Dynamic Memory Controller may need to coordinate with us -- see
+ * rockchip_pmu_block().
+ *
+ * dmc_pmu_mutex protects registration-time races, so DMC driver doesn't try to
+ * block() while we're initializing the PMU.
+ */
+static DEFINE_MUTEX(dmc_pmu_mutex);
+static struct rockchip_pmu *dmc_pmu;
+
+/*
+ * Block PMU transitions and make sure they don't interfere with ARM Trusted
+ * Firmware operations. There are two conflicts, noted in the comments below.
+ *
+ * Caller must unblock PMU transitions via rockchip_pmu_unblock().
+ */
+int rockchip_pmu_block(void)
+{
+ struct rockchip_pmu *pmu;
+ struct generic_pm_domain *genpd;
+ struct rockchip_pm_domain *pd;
+ int i, ret;
+
+ mutex_lock(&dmc_pmu_mutex);
+
+ /* No PMU (yet)? Then we just block rockchip_pmu_probe(). */
+ if (!dmc_pmu)
+ return 0;
+ pmu = dmc_pmu;
+
+ /*
+ * mutex blocks all idle transitions: we can't touch the
+ * PMU_BUS_IDLE_REQ (our ".idle_offset") register while ARM Trusted
+ * Firmware might be using it.
+ */
+ mutex_lock(&pmu->mutex);
+
+ /*
+ * Power domain clocks: Per Rockchip, we *must* keep certain clocks
+ * enabled for the duration of power-domain transitions. Most
+ * transitions are handled by this driver, but some cases (in
+ * particular, DRAM DVFS / memory-controller idle) must be handled by
+ * firmware. Firmware can handle most clock management via a special
+ * "ungate" register (PMU_CRU_GATEDIS_CON0), but unfortunately, this
+ * doesn't handle PLLs. We can assist this transition by doing the
+ * clock management on behalf of firmware.
+ */
+ for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ ret = clk_bulk_enable(pd->num_clks, pd->clks);
+ if (ret < 0) {
+ dev_err(pmu->dev,
+ "failed to enable clks for domain '%s': %d\n",
+ genpd->name, ret);
+ goto err;
+ }
+ }
+ }
+
+ return 0;
+
+err:
+ for (i = i - 1; i >= 0; i--) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ clk_bulk_disable(pd->num_clks, pd->clks);
+ }
+ }
+ mutex_unlock(&pmu->mutex);
+ mutex_unlock(&dmc_pmu_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_block);
+
+/* Unblock PMU transitions. */
+void rockchip_pmu_unblock(void)
+{
+ struct rockchip_pmu *pmu;
+ struct generic_pm_domain *genpd;
+ struct rockchip_pm_domain *pd;
+ int i;
+
+ if (dmc_pmu) {
+ pmu = dmc_pmu;
+ for (i = 0; i < pmu->genpd_data.num_domains; i++) {
+ genpd = pmu->genpd_data.domains[i];
+ if (genpd) {
+ pd = to_rockchip_pd(genpd);
+ clk_bulk_disable(pd->num_clks, pd->clks);
+ }
+ }
+
+ mutex_unlock(&pmu->mutex);
+ }
+
+ mutex_unlock(&dmc_pmu_mutex);
+}
+EXPORT_SYMBOL_GPL(rockchip_pmu_unblock);
+
static bool rockchip_pmu_domain_is_idle(struct rockchip_pm_domain *pd)
{
struct rockchip_pmu *pmu = pd->pmu;
@@ -690,6 +795,12 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
error = -ENODEV;
+ /*
+ * Prevent any rockchip_pmu_block() from racing with the remainder of
+ * setup (clocks, register initialization).
+ */
+ mutex_lock(&dmc_pmu_mutex);
+
for_each_available_child_of_node(np, node) {
error = rockchip_pm_add_one_domain(pmu, node);
if (error) {
@@ -719,10 +830,17 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
goto err_out;
}
+ /* We only expect one PMU. */
+ if (!WARN_ON_ONCE(dmc_pmu))
+ dmc_pmu = pmu;
+
+ mutex_unlock(&dmc_pmu_mutex);
+
return 0;
err_out:
rockchip_pm_domain_cleanup(pmu);
+ mutex_unlock(&dmc_pmu_mutex);
return error;
}
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 0bfb8eebd126..b8151d95a806 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;
- if (!em)
+ if (!em || em_is_artificial(em))
return false;
policy = cpufreq_cdev->policy;
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4310cb342a9f..b04dcbbf721a 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
+ struct em_perf_domain *em;
char *name;
int err, num_opps;
@@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
dfc->devfreq = df;
- dfc->em_pd = em_pd_get(dev);
- if (dfc->em_pd) {
+ em = em_pd_get(dev);
+ if (em && !em_is_artificial(em)) {
+ dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
- dev_dbg(dev, "missing EM for cooling device\n");
+ dev_dbg(dev, "missing proper EM for cooling device\n");
num_opps = dev_pm_opp_get_opp_count(dev);