diff options
-rw-r--r-- | drivers/thermal/x86_pkg_temp_thermal.c | 222 |
1 files changed, 110 insertions, 112 deletions
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 39303bcf09e3..2c8b7e718060 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -65,6 +65,7 @@ struct pkg_device { u32 msr_pkg_therm_low; u32 msr_pkg_therm_high; struct thermal_zone_device *tzone; + struct cpumask cpumask; }; static struct thermal_zone_params pkg_temp_tz_params = { @@ -73,7 +74,10 @@ static struct thermal_zone_params pkg_temp_tz_params = { /* List maintaining number of package instances */ static LIST_HEAD(phy_dev_list); -static DEFINE_MUTEX(phy_dev_list_mutex); +/* Serializes interrupt notification, work and hotplug */ +static DEFINE_SPINLOCK(pkg_temp_lock); +/* Protects zone operation in the work function against hotplug removal */ +static DEFINE_MUTEX(thermal_zone_mutex); /* Interrupt to work function schedule queue */ static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work); @@ -81,8 +85,6 @@ static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work); /* To track if the work is already scheduled on a package */ static u8 *pkg_work_scheduled; -/* Spin lock to prevent races with pkg_work_scheduled */ -static spinlock_t pkg_work_lock; static u16 max_phy_id; /* Debug counters to show using debugfs */ @@ -115,21 +117,23 @@ err_out: return -ENOENT; } +/* + * Protection: + * + * - cpu hotplug: Read serialized by cpu hotplug lock + * Write must hold pkg_temp_lock + * + * - Other callsites: Must hold pkg_temp_lock + */ static struct pkg_device *pkg_temp_thermal_get_dev(unsigned int cpu) { u16 phys_proc_id = topology_physical_package_id(cpu); struct pkg_device *pkgdev; - mutex_lock(&phy_dev_list_mutex); - - list_for_each_entry(pkgdev, &phy_dev_list, list) - if (pkgdev->phys_proc_id == phys_proc_id) { - mutex_unlock(&phy_dev_list_mutex); + list_for_each_entry(pkgdev, &phy_dev_list, list) { + if (pkgdev->phys_proc_id == phys_proc_id) return pkgdev; - } - - mutex_unlock(&phy_dev_list_mutex); - + } return NULL; } @@ -289,67 +293,66 @@ static inline void disable_pkg_thres_interrupt(void) static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) { - int cpu = smp_processor_id(); - struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu); - int phy_id = topology_physical_package_id(cpu); - bool notify = false; - unsigned long flags; + struct thermal_zone_device *tzone = NULL; + int phy_id, cpu = smp_processor_id(); + struct pkg_device *pkgdev; u64 msr_val, wr_val; - if (!pkgdev) - return; - - spin_lock_irqsave(&pkg_work_lock, flags); + mutex_lock(&thermal_zone_mutex); + spin_lock_irq(&pkg_temp_lock); ++pkg_work_cnt; - if (unlikely(phy_id > max_phy_id)) { - spin_unlock_irqrestore(&pkg_work_lock, flags); + + pkgdev = pkg_temp_thermal_get_dev(cpu); + if (!pkgdev) { + spin_unlock_irq(&pkg_temp_lock); + mutex_unlock(&thermal_zone_mutex); return; } + pkg_work_scheduled[phy_id] = 0; - spin_unlock_irqrestore(&pkg_work_lock, flags); rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); wr_val = msr_val & ~(THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1); if (wr_val != msr_val) { wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, wr_val); - notify = true; + tzone = pkgdev->tzone; } enable_pkg_thres_interrupt(); + spin_unlock_irq(&pkg_temp_lock); - if (notify) { - pr_debug("thermal_zone_device_update\n"); - thermal_zone_device_update(pkgdev->tzone, - THERMAL_EVENT_UNSPECIFIED); - } + /* + * If tzone is not NULL, then thermal_zone_mutex will prevent the + * concurrent removal in the cpu offline callback. + */ + if (tzone) + thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED); + + mutex_unlock(&thermal_zone_mutex); } static int pkg_thermal_notify(u64 msr_val) { int cpu = smp_processor_id(); int phy_id = topology_physical_package_id(cpu); + struct pkg_device *pkgdev; unsigned long flags; - /* - * When a package is in interrupted state, all CPU's in that package - * are in the same interrupt state. So scheduling on any one CPU in - * the package is enough and simply return for others. - */ - spin_lock_irqsave(&pkg_work_lock, flags); + spin_lock_irqsave(&pkg_temp_lock, flags); ++pkg_interrupt_cnt; - if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) || - pkg_work_scheduled[phy_id]) { - disable_pkg_thres_interrupt(); - spin_unlock_irqrestore(&pkg_work_lock, flags); - return -EINVAL; - } - pkg_work_scheduled[phy_id] = 1; - spin_unlock_irqrestore(&pkg_work_lock, flags); disable_pkg_thres_interrupt(); - schedule_delayed_work_on(cpu, + + /* Work is per package, so scheduling it once is enough. */ + pkgdev = pkg_temp_thermal_get_dev(cpu); + if (pkgdev && pkg_work_scheduled && !pkg_work_scheduled[phy_id]) { + pkg_work_scheduled[phy_id] = 1; + schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); + } + + spin_unlock_irqrestore(&pkg_temp_lock, flags); return 0; } @@ -373,29 +376,25 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) err = get_tj_max(cpu, &tj_max); if (err) - goto err_ret; - - mutex_lock(&phy_dev_list_mutex); + return err; pkgdev = kzalloc(sizeof(*pkgdev), GFP_KERNEL); - if (!pkgdev) { - err = -ENOMEM; - goto err_ret_unlock; - } + if (!pkgdev) + return -ENOMEM; - spin_lock_irqsave(&pkg_work_lock, flags); + spin_lock_irqsave(&pkg_temp_lock, flags); if (topology_physical_package_id(cpu) > max_phy_id) max_phy_id = topology_physical_package_id(cpu); temp = krealloc(pkg_work_scheduled, (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); if (!temp) { - spin_unlock_irqrestore(&pkg_work_lock, flags); - err = -ENOMEM; - goto err_ret_free; + spin_unlock_irqrestore(&pkg_temp_lock, flags); + kfree(pkgdev); + return -ENOMEM; } pkg_work_scheduled = temp; pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; - spin_unlock_irqrestore(&pkg_work_lock, flags); + spin_unlock_irqrestore(&pkg_temp_lock, flags); pkgdev->phys_proc_id = topology_physical_package_id(cpu); pkgdev->cpu = cpu; @@ -406,60 +405,81 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) pkgdev, &tzone_ops, &pkg_temp_tz_params, 0, 0); if (IS_ERR(pkgdev->tzone)) { err = PTR_ERR(pkgdev->tzone); - goto err_ret_free; + kfree(pkgdev); + return err; } /* Store MSR value for package thermal interrupt, to restore at exit */ rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &pkgdev->msr_pkg_therm_low, &pkgdev->msr_pkg_therm_high); + cpumask_set_cpu(cpu, &pkgdev->cpumask); + spin_lock_irq(&pkg_temp_lock); list_add_tail(&pkgdev->list, &phy_dev_list); - pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n", - pkgdev->phys_proc_id, cpu); - - mutex_unlock(&phy_dev_list_mutex); - + spin_unlock_irq(&pkg_temp_lock); return 0; - -err_ret_free: - kfree(pkgdev); -err_ret_unlock: - mutex_unlock(&phy_dev_list_mutex); - -err_ret: - return err; } -static int pkg_temp_thermal_device_remove(unsigned int cpu) +static void put_core_offline(unsigned int cpu) { struct pkg_device *pkgdev = pkg_temp_thermal_get_dev(cpu); + bool lastcpu; int target; if (!pkgdev) - return -ENODEV; + return; + + target = cpumask_any_but(&pkgdev->cpumask, cpu); + cpumask_clear_cpu(cpu, &pkgdev->cpumask); + lastcpu = target >= nr_cpu_ids; + /* + * Remove the sysfs files, if this is the last cpu in the package + * before doing further cleanups. + */ + if (lastcpu) { + struct thermal_zone_device *tzone = pkgdev->tzone; - mutex_lock(&phy_dev_list_mutex); + /* + * We must protect against a work function calling + * thermal_zone_update, after/while unregister. We null out + * the pointer under the zone mutex, so the worker function + * won't try to call. + */ + mutex_lock(&thermal_zone_mutex); + pkgdev->tzone = NULL; + mutex_unlock(&thermal_zone_mutex); - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); - /* This might be the last cpu in this package */ - if (target >= nr_cpu_ids) { - thermal_zone_device_unregister(pkgdev->tzone); + thermal_zone_device_unregister(tzone); + } + + /* + * If this is the last CPU in the package, restore the interrupt + * MSR and remove the package reference from the array. + */ + if (lastcpu) { + /* Protect against work and interrupts */ + spin_lock_irq(&pkg_temp_lock); + list_del(&pkgdev->list); /* - * Restore original MSR value for package thermal - * interrupt. + * After this point nothing touches the MSR anymore. We + * must drop the lock to make the cross cpu call. This goes + * away once we move that code to the hotplug state machine. */ + spin_unlock_irq(&pkg_temp_lock); wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, pkgdev->msr_pkg_therm_low, pkgdev->msr_pkg_therm_high); - list_del(&pkgdev->list); kfree(pkgdev); - } else if (pkgdev->cpu == cpu) { - pkgdev->cpu = target; } - mutex_unlock(&phy_dev_list_mutex); - - return 0; + /* + * Note, this is broken when work was really scheduled on the + * outgoing cpu because this will leave the work_scheduled flag set + * and the thermal interrupts disabled. Will be fixed in the next + * step as there is no way to fix it in a sane way with the per cpu + * work nonsense. + */ + cancel_delayed_work_sync(&per_cpu(pkg_temp_thermal_threshold_work, cpu)); } static int get_core_online(unsigned int cpu) @@ -475,20 +495,13 @@ static int get_core_online(unsigned int cpu) pkg_temp_thermal_threshold_work_fn); /* If the package exists, nothing to do */ - if (pkgdev) + if (pkgdev) { + cpumask_set_cpu(cpu, &pkgdev->cpumask); return 0; + } return pkg_temp_thermal_device_add(cpu); } -static void put_core_offline(unsigned int cpu) -{ - if (!pkg_temp_thermal_device_remove(cpu)) - cancel_delayed_work_sync( - &per_cpu(pkg_temp_thermal_threshold_work, cpu)); - - pr_debug("put_core_offline: cpu %d\n", cpu); -} - static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -523,8 +536,6 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; - spin_lock_init(&pkg_work_lock); - cpu_notifier_register_begin(); for_each_online_cpu(i) if (get_core_online(i)) @@ -550,7 +561,6 @@ module_init(pkg_temp_thermal_init) static void __exit pkg_temp_thermal_exit(void) { - struct pkg_device *pkgdev, *n; int i; platform_thermal_package_notify = NULL; @@ -558,20 +568,8 @@ static void __exit pkg_temp_thermal_exit(void) cpu_notifier_register_begin(); __unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); - mutex_lock(&phy_dev_list_mutex); - list_for_each_entry_safe(pkgdev, n, &phy_dev_list, list) { - /* Retore old MSR value for package thermal interrupt */ - wrmsr_on_cpu(pkgdev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, - pkgdev->msr_pkg_therm_low, - pkgdev->msr_pkg_therm_high); - thermal_zone_device_unregister(pkgdev->tzone); - list_del(&pkgdev->list); - kfree(pkgdev); - } - mutex_unlock(&phy_dev_list_mutex); for_each_online_cpu(i) - cancel_delayed_work_sync( - &per_cpu(pkg_temp_thermal_threshold_work, i)); + put_core_offline(i); cpu_notifier_register_done(); kfree(pkg_work_scheduled); |