From 79040cad3f8235937e229f1b9401ba36dd5ad69b Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 10 Feb 2014 14:25:43 -0800
Subject: drivers/edac/edac_mc_sysfs.c: poll timeout cannot be zero

If you do

  echo 0 > /sys/module/edac_core/parameters/edac_mc_poll_msec

the following stack trace is output because the edac module is not
designed to poll with a timeout of zero.

  WARNING: CPU: 12 PID: 0 at lib/list_debug.c:33 __list_add+0xac/0xc0()
  list_add corruption. prev->next should be next (ffff8808291dd1b8), but was           (null). (prev=ffff8808286fe3f8).
  Modules linked in: sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache cfg80211 rfkill x86_pkg_temp_thermal coretemp kvm_intel kvm ixgbe e1000e crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt ptp sb_edac iTCO_vendor_support pps_core mdio ipmi_devintf edac_core ioatdma microcode shpchp lpc_ich pcspkr i2c_i801 dca mfd_core ipmi_si wmi ipmi_msghandler nfsd auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sd_mod sr_mod cdrom crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt isci i2c_algo_bit drm_kms_helper ttm drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod
  CPU: 12 PID: 0 Comm: swapper/12 Not tainted 3.13.0+ #1
  Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
  Call Trace:
   <IRQ>
    __list_add+0xac/0xc0
    __internal_add_timer+0xab/0x130
    internal_add_timer+0x17/0x40
    mod_timer_pinned+0xca/0x170
    intel_pstate_timer_func+0x28a/0x380
    call_timer_fn+0x36/0x100
    run_timer_softirq+0x1ff/0x2f0
    __do_softirq+0xf5/0x2e0
    irq_exit+0x10d/0x120
    smp_apic_timer_interrupt+0x45/0x60
    apic_timer_interrupt+0x6d/0x80
   <EOI>
    cpuidle_idle_call+0xb9/0x1f0
    arch_cpu_idle+0xe/0x30
    cpu_startup_entry+0x9e/0x240
    start_secondary+0x1e4/0x290

  kernel BUG at kernel/timer.c:1084!
  invalid opcode: 0000 [#1] SMP
  Modules linked in: sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache cfg80211 rfkill x86_pkg_temp_thermal coretemp kvm_intel kvm ixgbe e1000e crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt ptp sb_edac iTCO_vendor_support pps_core mdio ipmi_devintf edac_core ioatdma microcode shpchp lpc_ich pcspkr i2c_i801 dca mfd_core ipmi_si wmi ipmi_msghandler nfsd auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sd_mod sr_mod cdrom crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt isci i2c_algo_bit drm_kms_helper ttm drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod
  CPU: 12 PID: 0 Comm: swapper/12 Tainted: G        W    3.13.0+ #1
  Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
  Call Trace:
   <IRQ>
    run_timer_softirq+0x245/0x2f0
    __do_softirq+0xf5/0x2e0
    irq_exit+0x10d/0x120
    smp_apic_timer_interrupt+0x45/0x60
    apic_timer_interrupt+0x6d/0x80
   <EOI>
    cpuidle_idle_call+0xb9/0x1f0
    arch_cpu_idle+0xe/0x30
    cpu_startup_entry+0x9e/0x240
    start_secondary+0x1e4/0x290
  RIP   cascade+0x93/0xa0

  WARNING: CPU: 36 PID: 1154 at kernel/workqueue.c:1461 __queue_delayed_work+0xed/0x1a0()
  Modules linked in: sg nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache cfg80211 rfkill x86_pkg_temp_thermal coretemp kvm_intel kvm ixgbe e1000e crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd iTCO_wdt ptp sb_edac iTCO_vendor_support pps_core mdio ipmi_devintf edac_core ioatdma microcode shpchp lpc_ich pcspkr i2c_i801 dca mfd_core ipmi_si wmi ipmi_msghandler nfsd auth_rpcgss nfs_acl lockd sunrpc xfs libcrc32c sd_mod sr_mod cdrom crc_t10dif crct10dif_common mgag200 syscopyarea sysfillrect sysimgblt isci i2c_algo_bit drm_kms_helper ttm drm libsas ahci libahci scsi_transport_sas libata i2c_core dm_mirror dm_region_hash dm_log dm_mod
  CPU: 36 PID: 1154 Comm: kworker/u481:3 Tainted: G        W    3.13.0+ #1
  Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013
  Workqueue: edac-poller edac_mc_workq_function [edac_core]
  Call Trace:
    dump_stack+0x45/0x56
    warn_slowpath_common+0x7d/0xa0
    warn_slowpath_null+0x1a/0x20
    __queue_delayed_work+0xed/0x1a0
    queue_delayed_work_on+0x27/0x50
    edac_mc_workq_function+0x72/0xa0 [edac_core]
    process_one_work+0x17b/0x460
    worker_thread+0x11b/0x400
    kthread+0xd2/0xf0
    ret_from_fork+0x7c/0xb0

This patch adds a range check in the edac_mc_poll_msec code to check for 0.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/edac/edac_mc_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/edac')
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 51c0362acf5c..8ec1747b1c39 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -61,7 +61,7 @@ static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
 	ret = kstrtol(val, 0, &l);
 	if (ret)
 		return ret;
-	if ((int)l != l)
+	if (!l || ((int)l != l))
 		return -EINVAL;
 	*((int *)kp->arg) = l;
 
-- 
cgit v1.2.3


From 9da21b1509d8aa7ab4846722817d16c72d656c91 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 3 Feb 2014 15:05:13 -0500
Subject: EDAC: Poll timeout cannot be zero, p2

Sanitize code even more to accept unsigned longs only and to not allow
polling intervals below 1 second as this is unnecessary and doesn't make
much sense anyway for polling errors.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1391457913-881-1-git-send-email-prarit@redhat.com
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: <stable@vger.kernel.org>
---
 drivers/edac/edac_mc.c       |  4 ++--
 drivers/edac/edac_mc_sysfs.c | 10 ++++++----
 drivers/edac/edac_module.h   |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'drivers/edac')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index e8c9ef03495b..aef5ec24908e 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -601,7 +601,7 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
  *	user space has updated our poll period value, need to
  *	reset our workq delays
  */
-void edac_mc_reset_delay_period(int value)
+void edac_mc_reset_delay_period(unsigned long value)
 {
 	struct mem_ctl_info *mci;
 	struct list_head *item;
@@ -611,7 +611,7 @@ void edac_mc_reset_delay_period(int value)
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
-		edac_mc_workq_setup(mci, (unsigned long) value);
+		edac_mc_workq_setup(mci, value);
 	}
 
 	mutex_unlock(&mem_ctls_mutex);
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 8ec1747b1c39..b335c6ab5efe 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -52,18 +52,20 @@ int edac_mc_get_poll_msec(void)
 
 static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
 {
-	long l;
+	unsigned long l;
 	int ret;
 
 	if (!val)
 		return -EINVAL;
 
-	ret = kstrtol(val, 0, &l);
+	ret = kstrtoul(val, 0, &l);
 	if (ret)
 		return ret;
-	if (!l || ((int)l != l))
+
+	if (l < 1000)
 		return -EINVAL;
-	*((int *)kp->arg) = l;
+
+	*((unsigned long *)kp->arg) = l;
 
 	/* notify edac_mc engine to reset the poll period */
 	edac_mc_reset_delay_period(l);
diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h
index 3d139c6e7fe3..f2118bfcf8df 100644
--- a/drivers/edac/edac_module.h
+++ b/drivers/edac/edac_module.h
@@ -52,7 +52,7 @@ extern void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
 extern void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev);
 extern void edac_device_reset_delay_period(struct edac_device_ctl_info
 					   *edac_dev, unsigned long value);
-extern void edac_mc_reset_delay_period(int value);
+extern void edac_mc_reset_delay_period(unsigned long value);
 
 extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
 
-- 
cgit v1.2.3


From cb6ef42e516cb8948f15e4b70dc03af8020050a2 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 12 Feb 2014 18:15:00 +0100
Subject: EDAC: Correct workqueue setup path

We're using edac_mc_workq_setup() both on the init path, when
we load an edac driver and when we change the polling period
(edac_mc_reset_delay_period) through /sys/.../edac_mc_poll_msec.

On that second path we don't need to init the workqueue which has been
initialized already.

Thanks to Tejun for workqueue insights.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1391457913-881-1-git-send-email-prarit@redhat.com
Cc: <stable@vger.kernel.org>
---
 drivers/edac/edac_mc.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'drivers/edac')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index aef5ec24908e..33edd6766344 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -559,7 +559,8 @@ static void edac_mc_workq_function(struct work_struct *work_req)
  *
  *		called with the mem_ctls_mutex held
  */
-static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
+static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec,
+				bool init)
 {
 	edac_dbg(0, "\n");
 
@@ -567,7 +568,9 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 	if (mci->op_state != OP_RUNNING_POLL)
 		return;
 
-	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
+	if (init)
+		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
+
 	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 }
 
@@ -611,7 +614,7 @@ void edac_mc_reset_delay_period(unsigned long value)
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
-		edac_mc_workq_setup(mci, value);
+		edac_mc_workq_setup(mci, value, false);
 	}
 
 	mutex_unlock(&mem_ctls_mutex);
@@ -782,7 +785,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 		/* This instance is NOW RUNNING */
 		mci->op_state = OP_RUNNING_POLL;
 
-		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
+		edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true);
 	} else {
 		mci->op_state = OP_RUNNING_INTERRUPT;
 	}
-- 
cgit v1.2.3


From c0f5eeed0f4cef4f05b74883a7160e7edde58b6a Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 24 Feb 2014 09:39:27 +0100
Subject: i7core_edac: Fix PCI device reference count

The reference count changes done by pci_get_device can be a little
misleading when the usage diverges from the most common scheme. The
reference count of the device passed as the last parameter is always
decreased, even if the function returns no new device. So if we are
going to try alternative device IDs, we must manually increment the
device reference count before each retry. If we don't, we end up
decreasing the reference count, and after a few modprobe/rmmod cycles
the PCI devices will vanish.

In other words and as Alan put it: without this fix the EDAC code
corrupts the PCI device list.

This fixes kernel bug #50491:
https://bugzilla.kernel.org/show_bug.cgi?id=50491

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Link: http://lkml.kernel.org/r/20140224093927.7659dd9d@endymion.delvare
Reviewed-by: Alan Cox <alan@linux.intel.com>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/i7core_edac.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'drivers/edac')

diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 87533ca7752e..d871275196f6 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1334,14 +1334,19 @@ static int i7core_get_onedevice(struct pci_dev **prev,
 	 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
 	 * to probe for the alternate address in case of failure
 	 */
-	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
+	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
+		pci_dev_get(*prev);	/* pci_get_device will put it */
 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 				      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
+	}
 
-	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
+	if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
+	    !pdev) {
+		pci_dev_get(*prev);	/* pci_get_device will put it */
 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 				      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
 				      *prev);
+	}
 
 	if (!pdev) {
 		if (*prev) {
-- 
cgit v1.2.3


From 75135da0d68419ef8a925f4c1d5f63d8046e314d Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 25 Feb 2014 09:43:13 +0100
Subject: i7300_edac: Fix device reference count

pci_get_device() decrements the reference count of "from" (last
argument) so when we break off the loop successfully we have only one
device reference - and we don't know which device we have. If we want
a reference to each device, we must take them explicitly and let
the pci_get_device() walk complete to avoid duplicate references.

This is serious, as over-putting device references will cause
the device to eventually disappear. Without this fix, the kernel
crashes after a few insmod/rmmod cycles.

Tested on an Intel S7000FC4UR system with a 7300 chipset.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Link: http://lkml.kernel.org/r/20140224111656.09bbb7ed@endymion.delvare
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Doug Thompson <dougthompson@xmission.com>
Cc: stable@vger.kernel.org
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/i7300_edac.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'drivers/edac')

diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index d63f4798f7d0..57e96a3350f0 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -943,33 +943,35 @@ static int i7300_get_devices(struct mem_ctl_info *mci)
 
 	/* Attempt to 'get' the MCH register we want */
 	pdev = NULL;
-	while (!pvt->pci_dev_16_1_fsb_addr_map ||
-	       !pvt->pci_dev_16_2_fsb_err_regs) {
-		pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
-				      PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev);
-		if (!pdev) {
-			/* End of list, leave */
-			i7300_printk(KERN_ERR,
-				"'system address,Process Bus' "
-				"device not found:"
-				"vendor 0x%x device 0x%x ERR funcs "
-				"(broken BIOS?)\n",
-				PCI_VENDOR_ID_INTEL,
-				PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
-			goto error;
-		}
-
+	while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+				      PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
+				      pdev))) {
 		/* Store device 16 funcs 1 and 2 */
 		switch (PCI_FUNC(pdev->devfn)) {
 		case 1:
-			pvt->pci_dev_16_1_fsb_addr_map = pdev;
+			if (!pvt->pci_dev_16_1_fsb_addr_map)
+				pvt->pci_dev_16_1_fsb_addr_map =
+							pci_dev_get(pdev);
 			break;
 		case 2:
-			pvt->pci_dev_16_2_fsb_err_regs = pdev;
+			if (!pvt->pci_dev_16_2_fsb_err_regs)
+				pvt->pci_dev_16_2_fsb_err_regs =
+							pci_dev_get(pdev);
 			break;
 		}
 	}
 
+	if (!pvt->pci_dev_16_1_fsb_addr_map ||
+	    !pvt->pci_dev_16_2_fsb_err_regs) {
+		/* At least one device was not found */
+		i7300_printk(KERN_ERR,
+			"'system address,Process Bus' device not found:"
+			"vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n",
+			PCI_VENDOR_ID_INTEL,
+			PCI_DEVICE_ID_INTEL_I7300_MCH_ERR);
+		goto error;
+	}
+
 	edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s  %x:%x\n",
 		 pci_name(pvt->pci_dev_16_0_fsb_ctlr),
 		 pvt->pci_dev_16_0_fsb_ctlr->vendor,
-- 
cgit v1.2.3