From 3b9c10e98021e1f92e6f8c7ce1778b86ba68db10 Mon Sep 17 00:00:00 2001
From: Daniel Fu <danifu@nvidia.com>
Date: Fri, 30 Aug 2013 19:48:22 +0800
Subject: cpuidle: Check the result of cpuidle_get_driver() against NULL

If the current CPU has no cpuidle driver, drv will be NULL in
cpuidle_driver_ref().  Check if that is the case before trying
to bump up the driver's refcount to prevent the kernel from
crashing.

[rjw: Subject and changelog]
Signed-off-by: Daniel Fu <danifu@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/driver.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 3ac499d5a207..6e11701f0fca 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -331,7 +331,8 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
 	spin_lock(&cpuidle_driver_lock);
 
 	drv = cpuidle_get_driver();
-	drv->refcnt++;
+	if (drv)
+		drv->refcnt++;
 
 	spin_unlock(&cpuidle_driver_lock);
 	return drv;
-- 
cgit v1.2.3


From e0ae8fee0e11c1a8e9b45ab14ab5fe58d87f031d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 30 Aug 2013 14:19:29 +0200
Subject: ACPI / scan: Change ordering of locks for device hotplug

Change the ordering of device hotplug locks in scan.c so that
acpi_scan_lock is always acquired after device_hotplug_lock.

This will make it possible to use device_hotplug_lock around some
code paths that acquire acpi_scan_lock safely (most importantly
system suspend and hibernation).  Apart from that, acpi_scan_lock
is platform-specific and device_hotplug_lock is general, so the
new ordering appears to be more appropriate from the overall
design viewpoint.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Toshi Kani <toshi.kani@hp.com>
---
 drivers/acpi/scan.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e2f6d9dbdf0d..42982b522b36 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -207,8 +207,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
 		return -EINVAL;
 	}
 
-	lock_device_hotplug();
-
 	/*
 	 * Carry out two passes here and ignore errors in the first pass,
 	 * because if the devices in question are memory blocks and
@@ -239,9 +237,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
 					    ACPI_UINT32_MAX,
 					    acpi_bus_online_companions, NULL,
 					    NULL, NULL);
-
-			unlock_device_hotplug();
-
 			put_device(&device->dev);
 			return -EBUSY;
 		}
@@ -252,8 +247,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
 
 	acpi_bus_trim(device);
 
-	unlock_device_hotplug();
-
 	/* Device node has been unregistered. */
 	put_device(&device->dev);
 	device = NULL;
@@ -309,6 +302,7 @@ static void acpi_bus_device_eject(void *context)
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error;
 
+	lock_device_hotplug();
 	mutex_lock(&acpi_scan_lock);
 
 	acpi_bus_get_device(handle, &device);
@@ -332,6 +326,7 @@ static void acpi_bus_device_eject(void *context)
 
  out:
 	mutex_unlock(&acpi_scan_lock);
+	unlock_device_hotplug();
 	return;
 
  err_out:
@@ -346,8 +341,8 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error;
 
-	mutex_lock(&acpi_scan_lock);
 	lock_device_hotplug();
+	mutex_lock(&acpi_scan_lock);
 
 	if (ost_source != ACPI_NOTIFY_BUS_CHECK) {
 		acpi_bus_get_device(handle, &device);
@@ -373,9 +368,9 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
 		kobject_uevent(&device->dev.kobj, KOBJ_ONLINE);
 
  out:
-	unlock_device_hotplug();
 	acpi_evaluate_hotplug_ost(handle, ost_source, ost_code, NULL);
 	mutex_unlock(&acpi_scan_lock);
+	unlock_device_hotplug();
 }
 
 static void acpi_scan_bus_check(void *context)
@@ -466,6 +461,7 @@ void acpi_bus_hot_remove_device(void *context)
 	acpi_handle handle = device->handle;
 	int error;
 
+	lock_device_hotplug();
 	mutex_lock(&acpi_scan_lock);
 
 	error = acpi_scan_hot_remove(device);
@@ -475,6 +471,7 @@ void acpi_bus_hot_remove_device(void *context)
 					  NULL);
 
 	mutex_unlock(&acpi_scan_lock);
+	unlock_device_hotplug();
 	kfree(context);
 }
 EXPORT_SYMBOL(acpi_bus_hot_remove_device);
-- 
cgit v1.2.3


From af65cfe9aeae03e0682bebdf4db94582d75562dd Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 2 Sep 2013 13:30:25 +0300
Subject: ACPI / LPSS: don't crash if a device has no MMIO resources

Intel LPSS devices that are enumerated from ACPI have both MMIO and IRQ
resources returned in their _CRS method. However, Apple Macbook Air with
Haswell has LPSS devices enumerated from PCI bus instead and _CRS method
returns only an interrupt number (but the device has _HID set that causes
the scan handler to match it).

The current ACPI / LPSS code sets pdata->dev_desc only when MMIO resource
is found for the device and in case of Macbook Air it is never found. That
leads to a NULL pointer dereference in register_device_clock().

Correct this by always setting the pdata->dev_desc.

Reported-and-tested-by: Imre Kaloz <kaloz@openwrt.org>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: 3.10+ <stable@vger.kernel.org> # 3.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 6a382188fa20..fb78bb9ad8f6 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -257,12 +257,13 @@ static int acpi_lpss_create_device(struct acpi_device *adev,
 				pdata->mmio_size = resource_size(&rentry->res);
 			pdata->mmio_base = ioremap(rentry->res.start,
 						   pdata->mmio_size);
-			pdata->dev_desc = dev_desc;
 			break;
 		}
 
 	acpi_dev_free_resource_list(&resource_list);
 
+	pdata->dev_desc = dev_desc;
+
 	if (dev_desc->clk_required) {
 		ret = register_device_clock(adev, pdata);
 		if (ret) {
-- 
cgit v1.2.3


From 89ec2f2ee104970329139e6526a075113c92f650 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 5 Sep 2013 23:39:20 +0200
Subject: ACPI / hotplug / PCI: Don't trim devices before scanning the
 namespace

In acpiphp_bus_add() we first remove device objects corresponding to
the given handle and the ACPI namespace branch below it, which are
then re-created by acpi_bus_scan().  This used to be done to clean
up after surprise removals, but now we do the cleanup through
trim_stale_devices() which checks if the devices in question are
actually gone before removing them, so the device hierarchy trimming
in acpiphp_bus_add() is not necessary any more and, moreover, it may
lead to problems if it removes device objects corresponding to
devices that are actually present.

For this reason, remove the leftover acpiphp_bus_trim() from
acpiphp_bus_add().

Reported-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 8054ddcdaed0..3f78212f4eee 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -487,7 +487,6 @@ static void acpiphp_bus_add(acpi_handle handle)
 {
 	struct acpi_device *adev = NULL;
 
-	acpiphp_bus_trim(handle);
 	acpi_bus_scan(handle);
 	acpi_bus_get_device(handle, &adev);
 	if (adev)
-- 
cgit v1.2.3


From 4be4be8fee2ee99a52f94f90d03d2f287ee1db86 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 6 Sep 2013 14:27:15 +0800
Subject: ACPICA: Fix for a Store->ArgX when ArgX contains a reference to a
 field.

This change fixes a problem where a Store operation to an ArgX object
that contained a reference to a field object did not complete the
automatic dereference and then write to the actual field object.
Instead, the object type of the field object was inadvertently changed
to match the type of the source operand. The new behavior will actually
write to the field object (buffer field or field unit), thus matching
the correct ACPI-defined behavior.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/exstore.c | 166 ++++++++++++++++++++++++++----------------
 1 file changed, 102 insertions(+), 64 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index 2bdba6f7d762..f0b09bf9887d 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -57,6 +57,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *val_desc,
 			      union acpi_operand_object *dest_desc,
 			      struct acpi_walk_state *walk_state);
 
+static acpi_status
+acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc,
+			     struct acpi_namespace_node *node,
+			     struct acpi_walk_state *walk_state);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ex_store
@@ -375,7 +380,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *source_desc,
  *              When storing into an object the data is converted to the
  *              target object type then stored in the object. This means
  *              that the target object type (for an initialized target) will
- *              not be changed by a store operation.
+ *              not be changed by a store operation. A copy_object can change
+ *              the target type, however.
+ *
+ *              The implicit_conversion flag is set to NO/FALSE only when
+ *              storing to an arg_x -- as per the rules of the ACPI spec.
  *
  *              Assumes parameters are already validated.
  *
@@ -399,7 +408,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 	target_type = acpi_ns_get_type(node);
 	target_desc = acpi_ns_get_attached_object(node);
 
-	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p(%s) into node %p(%s)\n",
+	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p (%s) to node %p (%s)\n",
 			  source_desc,
 			  acpi_ut_get_object_type_name(source_desc), node,
 			  acpi_ut_get_type_name(target_type)));
@@ -413,45 +422,30 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 		return_ACPI_STATUS(status);
 	}
 
-	/* If no implicit conversion, drop into the default case below */
-
-	if ((!implicit_conversion) ||
-	    ((walk_state->opcode == AML_COPY_OP) &&
-	     (target_type != ACPI_TYPE_LOCAL_REGION_FIELD) &&
-	     (target_type != ACPI_TYPE_LOCAL_BANK_FIELD) &&
-	     (target_type != ACPI_TYPE_LOCAL_INDEX_FIELD))) {
-		/*
-		 * Force execution of default (no implicit conversion). Note:
-		 * copy_object does not perform an implicit conversion, as per the ACPI
-		 * spec -- except in case of region/bank/index fields -- because these
-		 * objects must retain their original type permanently.
-		 */
-		target_type = ACPI_TYPE_ANY;
-	}
-
 	/* Do the actual store operation */
 
 	switch (target_type) {
-	case ACPI_TYPE_BUFFER_FIELD:
-	case ACPI_TYPE_LOCAL_REGION_FIELD:
-	case ACPI_TYPE_LOCAL_BANK_FIELD:
-	case ACPI_TYPE_LOCAL_INDEX_FIELD:
-
-		/* For fields, copy the source data to the target field. */
-
-		status = acpi_ex_write_data_to_field(source_desc, target_desc,
-						     &walk_state->result_obj);
-		break;
-
 	case ACPI_TYPE_INTEGER:
 	case ACPI_TYPE_STRING:
 	case ACPI_TYPE_BUFFER:
 		/*
-		 * These target types are all of type Integer/String/Buffer, and
-		 * therefore support implicit conversion before the store.
-		 *
-		 * Copy and/or convert the source object to a new target object
+		 * The simple data types all support implicit source operand
+		 * conversion before the store.
 		 */
+
+		if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) {
+			/*
+			 * However, copy_object and Stores to arg_x do not perform
+			 * an implicit conversion, as per the ACPI specification.
+			 * A direct store is performed instead.
+			 */
+			status = acpi_ex_store_direct_to_node(source_desc, node,
+							      walk_state);
+			break;
+		}
+
+		/* Store with implicit source operand conversion support */
+
 		status =
 		    acpi_ex_store_object_to_object(source_desc, target_desc,
 						   &new_desc, walk_state);
@@ -465,13 +459,12 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 			 * the Name's type to that of the value being stored in it.
 			 * source_desc reference count is incremented by attach_object.
 			 *
-			 * Note: This may change the type of the node if an explicit store
-			 * has been performed such that the node/object type has been
-			 * changed.
+			 * Note: This may change the type of the node if an explicit
+			 * store has been performed such that the node/object type
+			 * has been changed.
 			 */
-			status =
-			    acpi_ns_attach_object(node, new_desc,
-						  new_desc->common.type);
+			status = acpi_ns_attach_object(node, new_desc,
+						       new_desc->common.type);
 
 			ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
 					  "Store %s into %s via Convert/Attach\n",
@@ -482,38 +475,83 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
 		}
 		break;
 
-	default:
-
-		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-				  "Storing [%s] (%p) directly into node [%s] (%p)"
-				  " with no implicit conversion\n",
-				  acpi_ut_get_object_type_name(source_desc),
-				  source_desc,
-				  acpi_ut_get_object_type_name(target_desc),
-				  node));
+	case ACPI_TYPE_BUFFER_FIELD:
+	case ACPI_TYPE_LOCAL_REGION_FIELD:
+	case ACPI_TYPE_LOCAL_BANK_FIELD:
+	case ACPI_TYPE_LOCAL_INDEX_FIELD:
+		/*
+		 * For all fields, always write the source data to the target
+		 * field. Any required implicit source operand conversion is
+		 * performed in the function below as necessary. Note, field
+		 * objects must retain their original type permanently.
+		 */
+		status = acpi_ex_write_data_to_field(source_desc, target_desc,
+						     &walk_state->result_obj);
+		break;
 
+	default:
 		/*
 		 * No conversions for all other types. Directly store a copy of
-		 * the source object. NOTE: This is a departure from the ACPI
-		 * spec, which states "If conversion is impossible, abort the
-		 * running control method".
+		 * the source object. This is the ACPI spec-defined behavior for
+		 * the copy_object operator.
 		 *
-		 * This code implements "If conversion is impossible, treat the
-		 * Store operation as a CopyObject".
+		 * NOTE: For the Store operator, this is a departure from the
+		 * ACPI spec, which states "If conversion is impossible, abort
+		 * the running control method". Instead, this code implements
+		 * "If conversion is impossible, treat the Store operation as
+		 * a CopyObject".
 		 */
-		status =
-		    acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc,
-						    walk_state);
-		if (ACPI_FAILURE(status)) {
-			return_ACPI_STATUS(status);
-		}
-
-		status =
-		    acpi_ns_attach_object(node, new_desc,
-					  new_desc->common.type);
-		acpi_ut_remove_reference(new_desc);
+		status = acpi_ex_store_direct_to_node(source_desc, node,
+						      walk_state);
 		break;
 	}
 
 	return_ACPI_STATUS(status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_store_direct_to_node
+ *
+ * PARAMETERS:  source_desc             - Value to be stored
+ *              node                    - Named object to receive the value
+ *              walk_state              - Current walk state
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: "Store" an object directly to a node. This involves a copy
+ *              and an attach.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc,
+			     struct acpi_namespace_node *node,
+			     struct acpi_walk_state *walk_state)
+{
+	acpi_status status;
+	union acpi_operand_object *new_desc;
+
+	ACPI_FUNCTION_TRACE(ex_store_direct_to_node);
+
+	ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+			  "Storing [%s] (%p) directly into node [%s] (%p)"
+			  " with no implicit conversion\n",
+			  acpi_ut_get_object_type_name(source_desc),
+			  source_desc, acpi_ut_get_type_name(node->type),
+			  node));
+
+	/* Copy the source object to a new object */
+
+	status =
+	    acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, walk_state);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Attach the new object to the node */
+
+	status = acpi_ns_attach_object(node, new_desc, new_desc->common.type);
+	acpi_ut_remove_reference(new_desc);
+	return_ACPI_STATUS(status);
+}
-- 
cgit v1.2.3


From 2dc41281b1d1178befe4b76adf817570a7f45ec1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 6 Sep 2013 15:41:32 +0200
Subject: ACPI / hotplug / PCI: Avoid doing too much for spurious notifies

Sometimes we may get a spurious device check or bus check notify for
a hotplug device and in those cases we should avoid doing all of the
configuration work needed when something actually changes.  To that
end, check the return value of pci_scan_slot() in enable_slot() and
bail out early if it is 0.

This turns out to help reduce the amount of diagnostic output from
the ACPIPHP subsystem and speed up boot on at least one system that
generates multiple device check notifies for PCIe devices on the root
bus during boot.

Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3f78212f4eee..65290226e5dd 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -542,12 +542,12 @@ static void __ref enable_slot(struct acpiphp_slot *slot)
 	struct acpiphp_func *func;
 	int max, pass;
 	LIST_HEAD(add_list);
+	int nr_found;
 
 	list_for_each_entry(func, &slot->funcs, sibling)
 		acpiphp_bus_add(func_to_handle(func));
 
-	pci_scan_slot(bus, PCI_DEVFN(slot->device, 0));
-
+	nr_found = pci_scan_slot(bus, PCI_DEVFN(slot->device, 0));
 	max = acpiphp_max_busnr(bus);
 	for (pass = 0; pass < 2; pass++) {
 		list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -566,8 +566,11 @@ static void __ref enable_slot(struct acpiphp_slot *slot)
 			}
 		}
 	}
-
 	__pci_bus_assign_resources(bus, &add_list, NULL);
+	/* Nothing more to do here if there are no new devices on this bus. */
+	if (!nr_found && (slot->flags & SLOT_ENABLED))
+		return;
+
 	acpiphp_sanitize_bus(bus);
 	acpiphp_set_hpp_values(bus);
 	acpiphp_set_acpi_region(slot);
-- 
cgit v1.2.3


From e532e84ea11399a6066f31641425a76dd012ce77 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 6 Sep 2013 15:41:41 +0200
Subject: ACPI / hotplug / PCI: Use _OST to notify firmware about notify status

The spec suggests that we should use _OST to notify the platform
about the status of notifications it sends us, for example so that
it doesn't repeate a notification that has been handled already.

This turns out to help reduce the amount of diagnostic output from
the ACPIPHP subsystem and speed up boot on at least one system that
generates multiple device check notifies for PCIe devices on the root
bus during boot.

Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 65290226e5dd..1971d2943de4 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -870,6 +870,8 @@ static void hotplug_event_work(struct work_struct *work)
 	hotplug_event(hp_work->handle, hp_work->type, context);
 
 	acpi_scan_lock_release();
+	acpi_evaluate_hotplug_ost(hp_work->handle, hp_work->type,
+				  ACPI_OST_SC_SUCCESS, NULL);
 	kfree(hp_work); /* allocated in handle_hotplug_event() */
 	put_bridge(context->func.parent);
 }
@@ -885,11 +887,15 @@ static void hotplug_event_work(struct work_struct *work)
 static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 {
 	struct acpiphp_context *context;
+	u32 ost_code = ACPI_OST_SC_SUCCESS;
 
 	switch (type) {
 	case ACPI_NOTIFY_BUS_CHECK:
 	case ACPI_NOTIFY_DEVICE_CHECK:
+		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
+		ost_code = ACPI_OST_SC_EJECT_IN_PROGRESS;
+		acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
 		break;
 
 	case ACPI_NOTIFY_DEVICE_WAKE:
@@ -898,20 +904,21 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 	case ACPI_NOTIFY_FREQUENCY_MISMATCH:
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a frequency mismatch\n");
-		return;
+		goto out;
 
 	case ACPI_NOTIFY_BUS_MODE_MISMATCH:
 		acpi_handle_err(handle, "Device cannot be configured due "
 				"to a bus mode mismatch\n");
-		return;
+		goto out;
 
 	case ACPI_NOTIFY_POWER_FAULT:
 		acpi_handle_err(handle, "Device has suffered a power fault\n");
-		return;
+		goto out;
 
 	default:
 		acpi_handle_warn(handle, "Unsupported event type 0x%x\n", type);
-		return;
+		ost_code = ACPI_OST_SC_UNRECOGNIZED_NOTIFY;
+		goto out;
 	}
 
 	mutex_lock(&acpiphp_context_lock);
@@ -920,8 +927,14 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 		get_bridge(context->func.parent);
 		acpiphp_put_context(context);
 		alloc_acpi_hp_work(handle, type, context, hotplug_event_work);
+		mutex_unlock(&acpiphp_context_lock);
+		return;
 	}
 	mutex_unlock(&acpiphp_context_lock);
+	ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+
+ out:
+	acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
 }
 
 /*
-- 
cgit v1.2.3


From a47d8c8e72a5fa2e69117674c4b0b6cc79c5bc53 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 8 Sep 2013 00:07:28 +0200
Subject: ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device
 checks

In the current ACPIPHP notify handler we always go directly for a
rescan of the parent bus if we get a device check notification for
a device that is not a bridge.  However, this obviously is
overzealous if nothing really changes, because this way we may rescan
the whole PCI hierarchy pretty much in vain.

That happens on Alex Williamson's machine whose ACPI tables contain
device objects that are supposed to coresspond to PCIe root ports,
but those ports aren't physically present (or at least they aren't
visible in the PCI config space to us).  The BIOS generates multiple
device check notifies for those objects during boot and for each of
them we go straight for the parent bus rescan, but the parent bus is
the root bus in this particular case.  In consequence, we rescan the
whole PCI bus from the top several times in a row, which is
completely unnecessary, increases boot time by 50% (after previous
fixes) and generates excess dmesg output from the PCI subsystem.

Fix the problem by checking if we can find anything new in the
slot corresponding to the device we've got a device check notify
for and doing nothig if that's not the case.

The spec (ACPI 5.0, Section 5.6.6) appears to mandate this behavior,
as it says:

  Device Check. Used to notify OSPM that the device either appeared
  or disappeared. If the device has appeared, OSPM will re-enumerate
  from the parent. If the device has disappeared, OSPM will
  invalidate the state of the device. OSPM may optimize out
  re-enumeration.

Therefore, according to the spec, we are free to do nothing if
nothing changes.

References: https://bugzilla.kernel.org/show_bug.cgi?id=60865
Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/hotplug/acpiphp_glue.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 1971d2943de4..9d6e535e74a1 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -528,6 +528,16 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
 	}
 }
 
+static int acpiphp_rescan_slot(struct acpiphp_slot *slot)
+{
+	struct acpiphp_func *func;
+
+	list_for_each_entry(func, &slot->funcs, sibling)
+		acpiphp_bus_add(func_to_handle(func));
+
+	return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0));
+}
+
 /**
  * enable_slot - enable, configure a slot
  * @slot: slot to be enabled
@@ -544,10 +554,7 @@ static void __ref enable_slot(struct acpiphp_slot *slot)
 	LIST_HEAD(add_list);
 	int nr_found;
 
-	list_for_each_entry(func, &slot->funcs, sibling)
-		acpiphp_bus_add(func_to_handle(func));
-
-	nr_found = pci_scan_slot(bus, PCI_DEVFN(slot->device, 0));
+	nr_found = acpiphp_rescan_slot(slot);
 	max = acpiphp_max_busnr(bus);
 	for (pass = 0; pass < 2; pass++) {
 		list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -840,11 +847,22 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data)
 	case ACPI_NOTIFY_DEVICE_CHECK:
 		/* device check */
 		dbg("%s: Device check notify on %s\n", __func__, objname);
-		if (bridge)
+		if (bridge) {
 			acpiphp_check_bridge(bridge);
-		else
-			acpiphp_check_bridge(func->parent);
+		} else {
+			struct acpiphp_slot *slot = func->slot;
+			int ret;
 
+			/*
+			 * Check if anything has changed in the slot and rescan
+			 * from the parent if that's the case.
+			 */
+			mutex_lock(&slot->crit_sect);
+			ret = acpiphp_rescan_slot(slot);
+			mutex_unlock(&slot->crit_sect);
+			if (ret)
+				acpiphp_check_bridge(func->parent);
+		}
 		break;
 
 	case ACPI_NOTIFY_EJECT_REQUEST:
-- 
cgit v1.2.3


From 11b88ee275ec8590a373396888c2460ee89364d6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 9 Sep 2013 23:07:47 +0200
Subject: ACPI / bind: Prefer device objects with _STA to those without it

As reported at https://bugzilla.kernel.org/show_bug.cgi?id=60829,
there still are cases in which do_find_child() doesn't choose the
ACPI device object it is "expected" to choose if there are more such
objects matching one PCI device present.  This particular problem may
be worked around by making do_find_child() return device obejcts witn
_STA whose result indicates that the device is enabled before device
objects without _STA if there's more than one device object to choose
from.

This change doesn't affect the case in which there's only one
matching ACPI device object per PCI device.

References: https://bugzilla.kernel.org/show_bug.cgi?id=60829
Reported-by: Peter Wu <lekensteyn@gmail.com>
Tested-by: Felix Lisczyk <felix.lisczyk@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/glue.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

(limited to 'drivers')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 94672297e1b1..10f0f40587bb 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -79,6 +79,9 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
 	return ret;
 }
 
+#define FIND_CHILD_MIN_SCORE	1
+#define FIND_CHILD_MAX_SCORE	2
+
 static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
 				  void *not_used, void **ret_p)
 {
@@ -92,14 +95,17 @@ static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
 	return AE_OK;
 }
 
-static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
+static int do_find_child_checks(acpi_handle handle, bool is_bridge)
 {
+	bool sta_present = true;
 	unsigned long long sta;
 	acpi_status status;
 
-	status = acpi_bus_get_status_handle(handle, &sta);
-	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
-		return false;
+	status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
+	if (status == AE_NOT_FOUND)
+		sta_present = false;
+	else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
+		return -ENODEV;
 
 	if (is_bridge) {
 		void *test = NULL;
@@ -107,16 +113,17 @@ static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
 		/* Check if this object has at least one child device. */
 		acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
 				    acpi_dev_present, NULL, NULL, &test);
-		return !!test;
+		if (!test)
+			return -ENODEV;
 	}
-	return true;
+	return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
 }
 
 struct find_child_context {
 	u64 addr;
 	bool is_bridge;
 	acpi_handle ret;
-	bool ret_checked;
+	int ret_score;
 };
 
 static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
@@ -125,6 +132,7 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
 	struct find_child_context *context = data;
 	unsigned long long addr;
 	acpi_status status;
+	int score;
 
 	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
 	if (ACPI_FAILURE(status) || addr != context->addr)
@@ -144,15 +152,20 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
 	 * its handle if so.  Second, check the same for the object that we've
 	 * just found.
 	 */
-	if (!context->ret_checked) {
-		if (acpi_extra_checks_passed(context->ret, context->is_bridge))
+	if (!context->ret_score) {
+		score = do_find_child_checks(context->ret, context->is_bridge);
+		if (score == FIND_CHILD_MAX_SCORE)
 			return AE_CTRL_TERMINATE;
 		else
-			context->ret_checked = true;
+			context->ret_score = score;
 	}
-	if (acpi_extra_checks_passed(handle, context->is_bridge)) {
+	score = do_find_child_checks(handle, context->is_bridge);
+	if (score == FIND_CHILD_MAX_SCORE) {
 		context->ret = handle;
 		return AE_CTRL_TERMINATE;
+	} else if (score > context->ret_score) {
+		context->ret = handle;
+		context->ret_score = score;
 	}
 	return AE_OK;
 }
-- 
cgit v1.2.3


From f73d39338444d9915c746403bd98b145ff9d2ba4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 31 Aug 2013 17:53:40 +0530
Subject: cpufreq: don't allow governor limits to be changed when it is
 disabled

__cpufreq_governor() returns with -EBUSY when governor is already
stopped and we try to stop it again, but when it is stopped we must
not allow calls to CPUFREQ_GOV_LIMITS event as well.

This patch adds this check in __cpufreq_governor().

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5c75e3147a60..06a2496d2075 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1692,8 +1692,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 						policy->cpu, event);
 
 	mutex_lock(&cpufreq_governor_lock);
-	if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) ||
-	    (policy->governor_enabled && (event == CPUFREQ_GOV_START))) {
+	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
+	    || (!policy->governor_enabled
+	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
 		mutex_unlock(&cpufreq_governor_lock);
 		return -EBUSY;
 	}
-- 
cgit v1.2.3


From 19c763031acb831a5ab9c1a701b7fedda073eb3f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Sat, 31 Aug 2013 17:48:23 +0530
Subject: cpufreq: serialize calls to __cpufreq_governor()

We can't take a big lock around __cpufreq_governor() as this causes
recursive locking for some cases. But calls to this routine must be
serialized for every policy. Otherwise we can see some unpredictable
events.

For example, consider following scenario:

__cpufreq_remove_dev()
 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
   policy->governor->governor(policy, CPUFREQ_GOV_STOP);
    cpufreq_governor_dbs()
     case CPUFREQ_GOV_STOP:
      mutex_destroy(&cpu_cdbs->timer_mutex)
      cpu_cdbs->cur_policy = NULL;
  <PREEMPT>
store()
 __cpufreq_set_policy()
  __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
    policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
     case CPUFREQ_GOV_LIMITS:
      mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
       if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL

And so store() will eventually result in a crash if cur_policy is
NULL at this point.

Introduce an additional variable which would guarantee serialization
here.

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 7 ++++++-
 include/linux/cpufreq.h   | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 06a2496d2075..7e6baa58a7f2 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1692,13 +1692,15 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 						policy->cpu, event);
 
 	mutex_lock(&cpufreq_governor_lock);
-	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
+	if (policy->governor_busy
+	    || (policy->governor_enabled && event == CPUFREQ_GOV_START)
 	    || (!policy->governor_enabled
 	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
 		mutex_unlock(&cpufreq_governor_lock);
 		return -EBUSY;
 	}
 
+	policy->governor_busy = true;
 	if (event == CPUFREQ_GOV_STOP)
 		policy->governor_enabled = false;
 	else if (event == CPUFREQ_GOV_START)
@@ -1727,6 +1729,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 			((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
 		module_put(policy->governor->owner);
 
+	mutex_lock(&cpufreq_governor_lock);
+	policy->governor_busy = false;
+	mutex_unlock(&cpufreq_governor_lock);
 	return ret;
 }
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d568f3975eeb..cca885dac1d3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -76,6 +76,7 @@ struct cpufreq_policy {
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
+	bool			governor_busy;
 
 	struct work_struct	update; /* if update_policy() needs to be
 					 * called, but you're in IRQ context */
-- 
cgit v1.2.3


From a857c0b9e24e39fe5be82451b65377795f9538d8 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Sat, 7 Sep 2013 18:35:08 +0200
Subject: cpufreq: Fix wrong time unit conversion

The time spent by a CPU under a given frequency is stored in jiffies unit
in the cpu var cpufreq_stats_table->time_in_state[i], i being the index of
the frequency.

This is what is displayed in the following file on the right column:

     cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state
     2301000 19835820
     2300000 3172
     [...]

Now cpufreq converts this jiffies unit delta to clock_t before returning it
to the user as in the above file. And that conversion is achieved using the API
cputime64_to_clock_t().

Although it accidentally works on traditional tick based cputime accounting, where
cputime_t maps directly to jiffies, it doesn't work with other types of cputime
accounting such as CONFIG_VIRT_CPU_ACCOUNTING_* where cputime_t can map to nsecs
or any granularity preffered by the architecture.

For example we get a buggy zero delta on full dyntick configurations:

     cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state
     2301000 0
     2300000 0
     [...]

Fix this with using the proper jiffies_64_t to clock_t conversion.

Reported-and-tested-by: Carsten Emde <C.Emde@osadl.org>
Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 04452f026ed0..4cf0d2805cb2 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -74,7 +74,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	for (i = 0; i < stat->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
 			(unsigned long long)
-			cputime64_to_clock_t(stat->time_in_state[i]));
+			jiffies_64_to_clock_t(stat->time_in_state[i]));
 	}
 	return len;
 }
-- 
cgit v1.2.3


From cedb70afd077b00bff7379042fdbf7eef32606c9 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 7 Sep 2013 01:23:09 +0530
Subject: cpufreq: Split __cpufreq_remove_dev() into two parts

During CPU offline, the cpufreq core invokes __cpufreq_remove_dev()
to perform work such as stopping the cpufreq governor, clearing the
CPU from the policy structure etc, and finally cleaning up the
kobject.

There are certain subtle issues related to the kobject cleanup, and
it would be much easier to deal with them if we separate that part
from the rest of the cleanup-work in the CPU offline phase. So split
the __cpufreq_remove_dev() function into 2 parts: one that handles
the kobject cleanup, and the other that handles the rest of the work.

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 65 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 12 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7e6baa58a7f2..a33174e324d1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1141,22 +1141,14 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 	return cpu_dev->id;
 }
 
-/**
- * __cpufreq_remove_dev - remove a CPU device
- *
- * Removes the cpufreq interface for a CPU device.
- * Caller should already have policy_rwsem in write mode for this CPU.
- * This routine frees the rwsem before returning.
- */
-static int __cpufreq_remove_dev(struct device *dev,
-				struct subsys_interface *sif, bool frozen)
+static int __cpufreq_remove_dev_prepare(struct device *dev,
+					struct subsys_interface *sif,
+					bool frozen)
 {
 	unsigned int cpu = dev->id, cpus;
 	int new_cpu, ret;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
-	struct kobject *kobj;
-	struct completion *cmp;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
@@ -1213,6 +1205,33 @@ static int __cpufreq_remove_dev(struct device *dev,
 		}
 	}
 
+	return 0;
+}
+
+static int __cpufreq_remove_dev_finish(struct device *dev,
+				       struct subsys_interface *sif,
+				       bool frozen)
+{
+	unsigned int cpu = dev->id, cpus;
+	int ret;
+	unsigned long flags;
+	struct cpufreq_policy *policy;
+	struct kobject *kobj;
+	struct completion *cmp;
+
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
+	policy = per_cpu(cpufreq_cpu_data, cpu);
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+	if (!policy) {
+		pr_debug("%s: No cpu_data found\n", __func__);
+		return -EINVAL;
+	}
+
+	lock_policy_rwsem_read(cpu);
+	cpus = cpumask_weight(policy->cpus);
+	unlock_policy_rwsem_read(cpu);
+
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
 		if (cpufreq_driver->target) {
@@ -1272,6 +1291,27 @@ static int __cpufreq_remove_dev(struct device *dev,
 	return 0;
 }
 
+/**
+ * __cpufreq_remove_dev - remove a CPU device
+ *
+ * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
+ */
+static inline int __cpufreq_remove_dev(struct device *dev,
+				       struct subsys_interface *sif,
+				       bool frozen)
+{
+	int ret;
+
+	ret = __cpufreq_remove_dev_prepare(dev, sif, frozen);
+
+	if (!ret)
+		ret = __cpufreq_remove_dev_finish(dev, sif, frozen);
+
+	return ret;
+}
+
 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id;
@@ -2000,7 +2040,8 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 			break;
 
 		case CPU_DOWN_PREPARE:
-			__cpufreq_remove_dev(dev, NULL, frozen);
+			__cpufreq_remove_dev_prepare(dev, NULL, frozen);
+			__cpufreq_remove_dev_finish(dev, NULL, frozen);
 			break;
 
 		case CPU_DOWN_FAILED:
-- 
cgit v1.2.3


From 1aee40ac9c86759c05f2ceb4523642b22fc8ea36 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 7 Sep 2013 01:23:27 +0530
Subject: cpufreq: Invoke __cpufreq_remove_dev_finish() after releasing
 cpu_hotplug.lock

__cpufreq_remove_dev_finish() handles the kobject cleanup for a CPU going
offline. But because we destroy the kobject towards the end of the CPU offline
phase, there are certain race windows where a task can try to write to a
cpufreq sysfs file (eg: using store_scaling_max_freq()) while we are taking
that CPU offline, and this can bump up the kobject refcount, which in turn might
hinder the CPU offline task from running to completion. (It can also cause
other more serious problems such as trying to acquire a destroyed timer-mutex
etc., depending on the exact stage of the cleanup at which the task managed to
take a new refcount).

To fix the race window, we will need to synchronize those store_*() call-sites
with CPU hotplug, using get_online_cpus()/put_online_cpus(). However, that
in turn can cause a total deadlock because it can end up waiting for the
CPU offline task to complete, with incremented refcount!

Write to sysfs                            CPU offline task
--------------                            ----------------
kobj_refcnt++

                                          Acquire cpu_hotplug.lock

get_online_cpus();

					  Wait for kobj_refcnt to drop to zero

                     **DEADLOCK**

A simple way to avoid this problem is to perform the kobject cleanup in the
CPU offline path, with the cpu_hotplug.lock *released*. That is, we can
perform the wait-for-kobj-refcnt-to-drop as well as the subsequent cleanup
in the CPU_POST_DEAD stage of CPU offline, which is run with cpu_hotplug.lock
released. Doing this helps us avoid deadlocks due to holding kobject refcounts
and waiting on each other on the cpu_hotplug.lock.

(Note: We can't move all of the cpufreq CPU offline steps to the
CPU_POST_DEAD stage, because certain things such as stopping the governors
have to be done before the outgoing CPU is marked offline. So retain those
parts in the CPU_DOWN_PREPARE stage itself).

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a33174e324d1..34999fc3216f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2041,6 +2041,9 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
 
 		case CPU_DOWN_PREPARE:
 			__cpufreq_remove_dev_prepare(dev, NULL, frozen);
+			break;
+
+		case CPU_POST_DEAD:
 			__cpufreq_remove_dev_finish(dev, NULL, frozen);
 			break;
 
-- 
cgit v1.2.3


From 4f750c930822b92df74327a4d1364eff87701360 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 7 Sep 2013 01:23:43 +0530
Subject: cpufreq: Synchronize the cpufreq store_*() routines with CPU hotplug

The functions that are used to write to cpufreq sysfs files (such as
store_scaling_max_freq()) are not hotplug safe. They can race with CPU
hotplug tasks and lead to problems such as trying to acquire an already
destroyed timer-mutex etc.

Eg:

    __cpufreq_remove_dev()
     __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
       policy->governor->governor(policy, CPUFREQ_GOV_STOP);
        cpufreq_governor_dbs()
         case CPUFREQ_GOV_STOP:
          mutex_destroy(&cpu_cdbs->timer_mutex)
          cpu_cdbs->cur_policy = NULL;
      <PREEMPT>
    store()
     __cpufreq_set_policy()
      __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
        policy->governor->governor(policy, CPUFREQ_GOV_LIMITS);
         case CPUFREQ_GOV_LIMITS:
          mutex_lock(&cpu_cdbs->timer_mutex); <-- Warning (destroyed mutex)
           if (policy->max < cpu_cdbs->cur_policy->cur) <- cur_policy == NULL

So use get_online_cpus()/put_online_cpus() in the store_*() functions, to
synchronize with CPU hotplug. However, there is an additional point to note
here: some parts of the CPU teardown in the cpufreq subsystem are done in
the CPU_POST_DEAD stage, with cpu_hotplug.lock *released*. So, using the
get/put_online_cpus() functions alone is insufficient; we should also ensure
that we don't race with those latter steps in the hotplug sequence. We can
easily achieve this by checking if the CPU is online before proceeding with
the store, since the CPU would have been marked offline by the time the
CPU_POST_DEAD notifiers are executed.

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 34999fc3216f..cf016584b4ac 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -694,8 +694,13 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 	struct freq_attr *fattr = to_attr(attr);
 	ssize_t ret = -EINVAL;
 
+	get_online_cpus();
+
+	if (!cpu_online(policy->cpu))
+		goto unlock;
+
 	if (!down_read_trylock(&cpufreq_rwsem))
-		goto exit;
+		goto unlock;
 
 	if (lock_policy_rwsem_write(policy->cpu) < 0)
 		goto up_read;
@@ -709,7 +714,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
 up_read:
 	up_read(&cpufreq_rwsem);
-exit:
+unlock:
+	put_online_cpus();
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 56d07db274b7b15ca38b60ea4a762d40de093000 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 7 Sep 2013 01:23:55 +0530
Subject: cpufreq: Remove temporary fix for race between CPU hotplug and
 sysfs-writes

Commit "cpufreq: serialize calls to __cpufreq_governor()" had been a temporary
and partial solution to the race condition between writing to a cpufreq sysfs
file and taking a CPU offline. Now that we have a proper and complete solution
to that problem, remove the temporary fix.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 7 +------
 include/linux/cpufreq.h   | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index cf016584b4ac..2863214c5381 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1739,15 +1739,13 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 						policy->cpu, event);
 
 	mutex_lock(&cpufreq_governor_lock);
-	if (policy->governor_busy
-	    || (policy->governor_enabled && event == CPUFREQ_GOV_START)
+	if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
 	    || (!policy->governor_enabled
 	    && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
 		mutex_unlock(&cpufreq_governor_lock);
 		return -EBUSY;
 	}
 
-	policy->governor_busy = true;
 	if (event == CPUFREQ_GOV_STOP)
 		policy->governor_enabled = false;
 	else if (event == CPUFREQ_GOV_START)
@@ -1776,9 +1774,6 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
 			((event == CPUFREQ_GOV_POLICY_EXIT) && !ret))
 		module_put(policy->governor->owner);
 
-	mutex_lock(&cpufreq_governor_lock);
-	policy->governor_busy = false;
-	mutex_unlock(&cpufreq_governor_lock);
 	return ret;
 }
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index cca885dac1d3..d568f3975eeb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -76,7 +76,6 @@ struct cpufreq_policy {
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
-	bool			governor_busy;
 
 	struct work_struct	update; /* if update_policy() needs to be
 					 * called, but you're in IRQ context */
-- 
cgit v1.2.3


From 5136fa56582beadb7fa71eb30bc79148bfcba5c1 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 7 Sep 2013 01:24:06 +0530
Subject: cpufreq: Use signed type for 'ret' variable, to store negative error
 values

There are places where the variable 'ret' is declared as unsigned int
and then used to store negative return values such as -EINVAL. Fix them
by declaring the variable as a signed quantity.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2863214c5381..73d53d5a16ee 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -437,7 +437,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *policy,
 static ssize_t store_##file_name					\
 (struct cpufreq_policy *policy, const char *buf, size_t count)		\
 {									\
-	unsigned int ret;						\
+	int ret;							\
 	struct cpufreq_policy new_policy;				\
 									\
 	ret = cpufreq_get_policy(&new_policy, policy->cpu);		\
@@ -490,7 +490,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
 					const char *buf, size_t count)
 {
-	unsigned int ret;
+	int ret;
 	char	str_governor[16];
 	struct cpufreq_policy new_policy;
 
-- 
cgit v1.2.3


From 798282a8718347b04a2f0a4bae7d775c48c6bcb9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 10 Sep 2013 02:54:50 +0200
Subject: Revert "cpufreq: make sure frequency transitions are serialized"

Commit 7c30ed5 (cpufreq: make sure frequency transitions are
serialized) attempted to serialize frequency transitions by
adding checks to the CPUFREQ_PRECHANGE and CPUFREQ_POSTCHANGE
notifications.  However, it assumed that the notifications will
always originate from the driver's .target() callback, but they
also can be triggered by cpufreq_out_of_sync() and that leads to
warnings like this on some systems:

 WARNING: CPU: 0 PID: 14543 at drivers/cpufreq/cpufreq.c:317
 __cpufreq_notify_transition+0x238/0x260()
 In middle of another frequency transition

accompanied by a call trace similar to this one:

 [<ffffffff81720daa>] dump_stack+0x46/0x58
 [<ffffffff8106534c>] warn_slowpath_common+0x8c/0xc0
 [<ffffffff815b8560>] ? acpi_cpufreq_target+0x320/0x320
 [<ffffffff81065436>] warn_slowpath_fmt+0x46/0x50
 [<ffffffff815b1ec8>] __cpufreq_notify_transition+0x238/0x260
 [<ffffffff815b33be>] cpufreq_notify_transition+0x3e/0x70
 [<ffffffff815b345d>] cpufreq_out_of_sync+0x6d/0xb0
 [<ffffffff815b370c>] cpufreq_update_policy+0x10c/0x160
 [<ffffffff815b3760>] ? cpufreq_update_policy+0x160/0x160
 [<ffffffff81413813>] cpufreq_set_cur_state+0x8c/0xb5
 [<ffffffff814138df>] processor_set_cur_state+0xa3/0xcf
 [<ffffffff8158e13c>] thermal_cdev_update+0x9c/0xb0
 [<ffffffff8159046a>] step_wise_throttle+0x5a/0x90
 [<ffffffff8158e21f>] handle_thermal_trip+0x4f/0x140
 [<ffffffff8158e377>] thermal_zone_device_update+0x57/0xa0
 [<ffffffff81415b36>] acpi_thermal_check+0x2e/0x30
 [<ffffffff81415ca0>] acpi_thermal_notify+0x40/0xdc
 [<ffffffff813e7dbd>] acpi_device_notify+0x19/0x1b
 [<ffffffff813f8241>] acpi_ev_notify_dispatch+0x41/0x5c
 [<ffffffff813e3fbe>] acpi_os_execute_deferred+0x25/0x32
 [<ffffffff81081060>] process_one_work+0x170/0x4a0
 [<ffffffff81082121>] worker_thread+0x121/0x390
 [<ffffffff81082000>] ? manage_workers.isra.20+0x170/0x170
 [<ffffffff81088fe0>] kthread+0xc0/0xd0
 [<ffffffff81088f20>] ? flush_kthread_worker+0xb0/0xb0
 [<ffffffff8173582c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81088f20>] ? flush_kthread_worker+0xb0/0xb0

For this reason, revert commit 7c30ed5 along with the fix 266c13d
(cpufreq: Fix serialization of frequency transitions) on top of it
and we will revisit the serialization problem later.

Reported-by: Alessandro Bono <alessandro.bono@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 15 ---------------
 include/linux/cpufreq.h   |  1 -
 2 files changed, 16 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 73d53d5a16ee..5a64f66d36e0 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -280,13 +280,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 	switch (state) {
 
 	case CPUFREQ_PRECHANGE:
-		if (WARN(policy->transition_ongoing ==
-					cpumask_weight(policy->cpus),
-				"In middle of another frequency transition\n"))
-			return;
-
-		policy->transition_ongoing++;
-
 		/* detect if the driver reported a value as "old frequency"
 		 * which is not equal to what the cpufreq core thinks is
 		 * "old frequency".
@@ -306,12 +299,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		break;
 
 	case CPUFREQ_POSTCHANGE:
-		if (WARN(!policy->transition_ongoing,
-				"No frequency transition in progress\n"))
-			return;
-
-		policy->transition_ongoing--;
-
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
 		pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
 			(unsigned long)freqs->cpu);
@@ -1657,8 +1644,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 
 	if (cpufreq_disabled())
 		return -ENODEV;
-	if (policy->transition_ongoing)
-		return -EBUSY;
 
 	/* Make sure that target_freq is within supported range */
 	if (target_freq > policy->max)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d568f3975eeb..fcabc42d66ab 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -85,7 +85,6 @@ struct cpufreq_policy {
 	struct list_head        policy_list;
 	struct kobject		kobj;
 	struct completion	kobj_unregister;
-	int			transition_ongoing; /* Tracks transition status */
 };
 
 /* Only for ACPI */
-- 
cgit v1.2.3


From 6cdcdb793791f776ea9408581b1242b636d43b37 Mon Sep 17 00:00:00 2001
From: Nell Hardcastle <nell@spicious.com>
Date: Sun, 30 Jun 2013 15:58:57 -0700
Subject: intel_pstate: Add Haswell CPU models

Enable the intel_pstate driver for Haswell CPUs. One missing Ivy Bridge
model (0x3E) is also included. Models referenced from
tools/power/x86/turbostat/turbostat.c:has_nehalem_turbo_ratio_limit

Signed-off-by: Nell Hardcastle <nell@spicious.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers')

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6efd96c196b2..9733f29ed148 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -522,6 +522,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	ICPU(0x2a, default_policy),
 	ICPU(0x2d, default_policy),
 	ICPU(0x3a, default_policy),
+	ICPU(0x3c, default_policy),
+	ICPU(0x3e, default_policy),
+	ICPU(0x3f, default_policy),
+	ICPU(0x45, default_policy),
+	ICPU(0x46, default_policy),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
-- 
cgit v1.2.3


From 0d66b91ebff49841f607a3c079984c907c8a4199 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 12 Sep 2013 01:42:59 +0530
Subject: cpufreq: Fix crash in cpufreq-stats during suspend/resume

Stephen Warren reported that the cpufreq-stats code hits a NULL pointer
dereference during the second attempt to suspend a system. He also
pin-pointed the problem to commit 5302c3f "cpufreq: Perform light-weight
init/teardown during suspend/resume".

That commit actually ensured that the cpufreq-stats table and the
cpufreq-stats sysfs entries are *not* torn down (ie., not freed) during
suspend/resume, which makes it all the more surprising. However, it turns
out that the root-cause is not that we access an already freed memory, but
that the reference to the allocated memory gets moved around and we lose
track of that during resume, leading to the reported crash in a subsequent
suspend attempt.

In the suspend path, during CPU offline, the value of policy->cpu is updated
by choosing one of the surviving CPUs in that policy, as long as there is
atleast one CPU in that policy. And cpufreq_stats_update_policy_cpu() is
invoked to update the reference to the stats structure by assigning it to
the new CPU. However, in the resume path, during CPU online, we end up
assigning a fresh CPU as the policy->cpu, without letting cpufreq-stats
know about this. Thus the reference to the stats structure remains
(incorrectly) associated with the old CPU. So, in a subsequent suspend attempt,
during CPU offline, we end up accessing an incorrect location to get the
stats structure, which eventually leads to the NULL pointer dereference.

Fix this by letting cpufreq-stats know about the update of the policy->cpu
during CPU online in the resume path. (Also, move the update_policy_cpu()
function higher up in the file, so that __cpufreq_add_dev() can invoke
it).

Reported-and-tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5a64f66d36e0..62bdb955ea56 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -947,6 +947,18 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 	kfree(policy);
 }
 
+static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
+{
+	policy->last_cpu = policy->cpu;
+	policy->cpu = cpu;
+
+#ifdef CONFIG_CPU_FREQ_TABLE
+	cpufreq_frequency_table_update_policy_cpu(policy);
+#endif
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+			CPUFREQ_UPDATE_POLICY_CPU, policy);
+}
+
 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 			     bool frozen)
 {
@@ -1000,7 +1012,18 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 	if (!policy)
 		goto nomem_out;
 
-	policy->cpu = cpu;
+
+	/*
+	 * In the resume path, since we restore a saved policy, the assignment
+	 * to policy->cpu is like an update of the existing policy, rather than
+	 * the creation of a brand new one. So we need to perform this update
+	 * by invoking update_policy_cpu().
+	 */
+	if (frozen && cpu != policy->cpu)
+		update_policy_cpu(policy, cpu);
+	else
+		policy->cpu = cpu;
+
 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
 
@@ -1092,18 +1115,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	return __cpufreq_add_dev(dev, sif, false);
 }
 
-static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
-{
-	policy->last_cpu = policy->cpu;
-	policy->cpu = cpu;
-
-#ifdef CONFIG_CPU_FREQ_TABLE
-	cpufreq_frequency_table_update_policy_cpu(policy);
-#endif
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_UPDATE_POLICY_CPU, policy);
-}
-
 static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
 					   unsigned int old_cpu, bool frozen)
 {
-- 
cgit v1.2.3


From 61173f256a3bebfbd09b4bd2c164dde378614091 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 12 Sep 2013 01:43:25 +0530
Subject: cpufreq: Restructure if/else block to avoid unintended behavior

In __cpufreq_remove_dev_prepare(), the code which decides whether to remove
the sysfs link or nominate a new policy cpu, is governed by an if/else block
with a rather complex set of conditionals. Worse, they harbor a subtlety
which leads to certain unintended behavior.

The code looks like this:

        if (cpu != policy->cpu && !frozen) {
                sysfs_remove_link(&dev->kobj, "cpufreq");
        } else if (cpus > 1) {
		new_cpu = cpufreq_nominate_new_policy_cpu(...);
		...
		update_policy_cpu(..., new_cpu);
	}

The original intention was:
If the CPU going offline is not policy->cpu, just remove the link.
On the other hand, if the CPU going offline is the policy->cpu itself,
handover the policy->cpu job to some other surviving CPU in that policy.

But because the 'if' condition also includes the 'frozen' check, now there
are *two* possibilities by which we can enter the 'else' block:

1. cpu == policy->cpu (intended)
2. cpu != policy->cpu && frozen (unintended)

Due to the second (unintended) scenario, we end up spuriously nominating
a CPU as the policy->cpu, even when the existing policy->cpu is alive and
well. This can cause problems further down the line, especially when we end
up nominating the same policy->cpu as the new one (ie., old == new),
because it totally confuses update_policy_cpu().

To avoid this mess, restructure the if/else block to only do what was
originally intended, and thus prevent any unwelcome surprises.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62bdb955ea56..247842b2ee2d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1193,8 +1193,9 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
 		cpumask_clear_cpu(cpu, policy->cpus);
 	unlock_policy_rwsem_write(cpu);
 
-	if (cpu != policy->cpu && !frozen) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
+	if (cpu != policy->cpu) {
+		if (!frozen)
+			sysfs_remove_link(&dev->kobj, "cpufreq");
 	} else if (cpus > 1) {
 
 		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
-- 
cgit v1.2.3


From cb38ed5cf1c4fdb7454e4b48fb70c396f5acfb21 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 12 Sep 2013 01:43:42 +0530
Subject: cpufreq: Prevent problems in update_policy_cpu() if last_cpu ==
 new_cpu

If update_policy_cpu() is invoked with the existing policy->cpu itself
as the new-cpu parameter, then a lot of things can go terribly wrong.

In its present form, update_policy_cpu() always assumes that the new-cpu
is different from policy->cpu and invokes other functions to perform their
respective updates. And those functions implement the actual update like
this:

per_cpu(..., new_cpu) = per_cpu(..., last_cpu);
per_cpu(..., last_cpu) = NULL;

Thus, when new_cpu == last_cpu, the final NULL assignment makes the per-cpu
references vanish into thin air! (memory leak). From there, it leads to more
problems: cpufreq_stats_create_table() now doesn't find the per-cpu reference
and hence tries to create a new sysfs-group; but sysfs already had created
the group earlier, so it complains that it cannot create a duplicate filename.
In short, the repercussions of a rather innocuous invocation of
update_policy_cpu() can turn out to be pretty nasty.

Ideally update_policy_cpu() should handle this situation (new == last)
gracefully, and not lead to such severe problems. So fix it by adding an
appropriate check.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 247842b2ee2d..d32040cc1c46 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -949,6 +949,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 
 static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
 {
+	if (cpu == policy->cpu)
+		return;
+
 	policy->last_cpu = policy->cpu;
 	policy->cpu = cpu;
 
-- 
cgit v1.2.3


From 44871c9c7f7963f8869dd8bc9620221c9e9db153 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Wed, 11 Sep 2013 15:05:05 +0800
Subject: cpufreq: Acquire the lock in cpufreq_policy_restore() for reading

In cpufreq_policy_restore() before system suspend policy is read from
percpu's cpufreq_cpu_data_fallback.  It's a read operation rather
than a write one, so take the lock for reading in there.

Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index d32040cc1c46..43c24aa756f6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -906,11 +906,11 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
 	struct cpufreq_policy *policy;
 	unsigned long flags;
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
 
 	policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
 
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	return policy;
 }
-- 
cgit v1.2.3