From 28787bf47b11b08290918dcf91b08764cb5fe122 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Mon, 20 Mar 2017 12:25:51 +0100
Subject: ARM: sun8i: a33: remove highest OPP to fix CPU crashes

The highest supported frequency (1.2GHz) requires to "overvolt" the CPU.
However, some boards still do not have the cpu-supply DT property in the
cpu DT node which means that the CPU will always run with the same input
voltage but try to run at 1.2GHz frequency. This is the source of
(experienced) CPU crashes.

Remove the OPP which requires overvolting the CPU until all boards have
a cpu-supply property.

Fixes: 03749eb88e63 ("ARM: dts: sun8i: add opp-v2 table for A33")
Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 18c174fef84f..045d488cc7ed 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -66,12 +66,6 @@
 			opp-microvolt = <1200000>;
 			clock-latency-ns = <244144>; /* 8 32k periods */
 		};
-
-		opp@1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt = <1320000>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-		};
 	};
 
 	cpus {
-- 
cgit v1.2.3


From ea33c2c2051a266f68d9cd920c789cec828c8f11 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 21 Mar 2017 16:36:01 +0100
Subject: ARM: sun8i: a33: add operating-points-v2 property to all nodes

The OPP are declared as shared but no operating points are declared for
cpu1, 2 and 3. Thus, the following error happens during the boot:

cpu cpu1: dev_pm_opp_of_get_sharing_cpus: Couldn't find tcpu_dev node.

This patch applies the operating points to each cpu of the A33.

Fixes: 03749eb88e63 ("ARM: dts: sun8i: add opp-v2 table for A33")
Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 045d488cc7ed..a367c0ac4e76 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -75,16 +75,22 @@
 			operating-points-v2 = <&cpu0_opp_table>;
 		};
 
+		cpu@1 {
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
 		cpu@2 {
 			compatible = "arm,cortex-a7";
 			device_type = "cpu";
 			reg = <2>;
+			operating-points-v2 = <&cpu0_opp_table>;
 		};
 
 		cpu@3 {
 			compatible = "arm,cortex-a7";
 			device_type = "cpu";
 			reg = <3>;
+			operating-points-v2 = <&cpu0_opp_table>;
 		};
 	};
 
-- 
cgit v1.2.3


From 06e1a5cc570703796ff1bd3a712e8e3b15c6bb0d Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Mon, 6 Mar 2017 12:56:55 -0600
Subject: ARM: dts: OMAP3: Fix MFG ID EEPROM

The manufacturing information is stored in the EEPROM.  This chip
is an AT24C64 not not (nor has it ever been) 24C02.  This patch will
correctly address the EEPROM to read the entire contents and not just
256 bytes (of 0xff).

Fixes: 5e3447a29a38 ("ARM: dts: LogicPD Torpedo: Add AT24 EEPROM Support")

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 8f9a69ca818c..efe53998c961 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -121,7 +121,7 @@
 &i2c3 {
 	clock-frequency = <400000>;
 	at24@50 {
-		compatible = "at24,24c02";
+		compatible = "atmel,24c64";
 		readonly;
 		reg = <0x50>;
 	};
-- 
cgit v1.2.3


From ce2899428ec0249512023aecaa3530c29f775a4f Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegorslists@googlemail.com>
Date: Mon, 13 Mar 2017 10:03:14 +0100
Subject: ARM: dts: am335x-baltos: disable EEE for Atheros 8035 PHY

Though cpsw doesn't support EEE feature, Atheros 8035 provides
automatic EEE support that is enabled by default. This causes
occasional link drops when link partner also announces EEE support.
These link drops occur on both 100Mbit/s and 1000Mbit/s speeds.
So disable EEE advertising completely.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-baltos.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi
index efb5eae290a8..d42b98f15e8b 100644
--- a/arch/arm/boot/dts/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/am335x-baltos.dtsi
@@ -371,6 +371,8 @@
 
 	phy1: ethernet-phy@1 {
 		reg = <7>;
+		eee-broken-100tx;
+		eee-broken-1000t;
 	};
 };
 
-- 
cgit v1.2.3


From 7d79f6098d82f8c09914d7799bc96891ad9c3baf Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 Mar 2017 21:03:01 -0500
Subject: ARM: dts: ti: fix PCI bus dtc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dtc recently added PCI bus checks. Fix these warnings.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: "Benoît Cousson" <bcousson@baylibre.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra7.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 2c9e56f4aac5..bbfb9d5a70a9 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -283,6 +283,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 					  0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				linux,pci-domain = <0>;
@@ -319,6 +320,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 					  0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				linux,pci-domain = <1>;
-- 
cgit v1.2.3


From 9bcf53f34a2c1cebc45cc12e273dcd5f51fbc099 Mon Sep 17 00:00:00 2001
From: "Reizer, Eyal" <eyalr@ti.com>
Date: Sun, 26 Mar 2017 08:53:10 +0000
Subject: ARM: dts: am335x-evmsk: adjust mmc2 param to allow suspend

mmc2 used for wl12xx was missing the keep-power-in suspend
parameter. As a result the board couldn't reach suspend state.

Signed-off-by: Eyal Reizer <eyalr@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-evmsk.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 9e43c443738a..9ba4b18c0cb2 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -672,6 +672,7 @@
 	ti,non-removable;
 	bus-width = <4>;
 	cap-power-off-card;
+	keep-power-in-suspend;
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc2_pins>;
 
-- 
cgit v1.2.3


From 351b7c490700747d1dba1b0a10fbfe3448d11c35 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 22 Mar 2017 11:01:48 -0700
Subject: ARM: omap2+: Revert omap-smp.c changes resetting CPU1 during boot

Commit 3251885285e1 ("ARM: OMAP4+: Reset CPU1 properly for kexec") started
unconditionally resetting CPU1 because of a kexec boot issue I was seeing
earlier on omap4 when doing kexec boot between two different kernel
versions.

This caused issues on some systems. We should only reset CPU1 as a last
resort option, and try to avoid it where possible. Doing an unconditional
CPU1 reset causes issues for example when booting a bootloader configured
secure OS running on CPU1 as reported by Andrew F. Davis <afd@ti.com>.

We can't completely remove the reset of CPU1 as it would break kexec
booting from older kernels. But we can limit the CPU1 reset to cases
where CPU1 is wrongly parked within the memory area used by the booting
kernel. Then later on we can add support for parking CPU1 for kexec out
of the SDRAM back to bootrom.

So let's first fix the regression reported by Andrew by making CPU1 reset
conditional. To do this, we need to:

1. Save configured AUX_CORE_BOOT_1 for later

2. Modify AUX_CORE_BOOT_0 reading code to for HS SoCs to return
   the whole register instead of the CPU mask

3. Check if CPU1 is wrongly parked into the booting kernel by the
   previous kernel and reset if needed

Fixes: 3251885285e1 ("ARM: OMAP4+: Reset CPU1 properly for kexec")
Reported-by: Andrew F. Davis <afd@ti.com>
Cc: Andrew F. Davis <afd@ti.com>
Cc: Keerthy <j-keerthy@ti.com>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Santosh Shilimkar <ssantosh@kernel.org>
Cc: Tero Kristo <t-kristo@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Tested-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/common.h              |  1 +
 arch/arm/mach-omap2/omap-hotplug.c        |  2 +-
 arch/arm/mach-omap2/omap-mpuss-lowpower.c | 22 ++++++--
 arch/arm/mach-omap2/omap-smc.S            |  1 -
 arch/arm/mach-omap2/omap-smp.c            | 90 ++++++++++++++++++++++++++-----
 5 files changed, 96 insertions(+), 20 deletions(-)

diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index c4f2ace91ea2..3089d3bfa19b 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -270,6 +270,7 @@ extern const struct smp_operations omap4_smp_ops;
 extern int omap4_mpuss_init(void);
 extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
 extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
+extern u32 omap4_get_cpu1_ns_pa_addr(void);
 #else
 static inline int omap4_enter_lowpower(unsigned int cpu,
 					unsigned int power_state)
diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c
index d3fb5661bb5d..433db6d0b073 100644
--- a/arch/arm/mach-omap2/omap-hotplug.c
+++ b/arch/arm/mach-omap2/omap-hotplug.c
@@ -50,7 +50,7 @@ void omap4_cpu_die(unsigned int cpu)
 		omap4_hotplug_cpu(cpu, PWRDM_POWER_OFF);
 
 		if (omap_secure_apis_support())
-			boot_cpu = omap_read_auxcoreboot0();
+			boot_cpu = omap_read_auxcoreboot0() >> 9;
 		else
 			boot_cpu =
 				readl_relaxed(base + OMAP_AUX_CORE_BOOT_0) >> 5;
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 113ab2dd2ee9..03ec6d307c82 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -64,6 +64,7 @@
 #include "prm-regbits-44xx.h"
 
 static void __iomem *sar_base;
+static u32 old_cpu1_ns_pa_addr;
 
 #if defined(CONFIG_PM) && defined(CONFIG_SMP)
 
@@ -212,6 +213,11 @@ static void __init save_l2x0_context(void)
 {}
 #endif
 
+u32 omap4_get_cpu1_ns_pa_addr(void)
+{
+	return old_cpu1_ns_pa_addr;
+}
+
 /**
  * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
  * The purpose of this function is to manage low power programming
@@ -460,22 +466,30 @@ int __init omap4_mpuss_init(void)
 void __init omap4_mpuss_early_init(void)
 {
 	unsigned long startup_pa;
+	void __iomem *ns_pa_addr;
 
-	if (!(cpu_is_omap44xx() || soc_is_omap54xx()))
+	if (!(soc_is_omap44xx() || soc_is_omap54xx()))
 		return;
 
 	sar_base = omap4_get_sar_ram_base();
 
-	if (cpu_is_omap443x())
+	/* Save old NS_PA_ADDR for validity checks later on */
+	if (soc_is_omap44xx())
+		ns_pa_addr = sar_base + CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+	else
+		ns_pa_addr = sar_base + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+	old_cpu1_ns_pa_addr = readl_relaxed(ns_pa_addr);
+
+	if (soc_is_omap443x())
 		startup_pa = __pa_symbol(omap4_secondary_startup);
-	else if (cpu_is_omap446x())
+	else if (soc_is_omap446x())
 		startup_pa = __pa_symbol(omap4460_secondary_startup);
 	else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
 		startup_pa = __pa_symbol(omap5_secondary_hyp_startup);
 	else
 		startup_pa = __pa_symbol(omap5_secondary_startup);
 
-	if (cpu_is_omap44xx())
+	if (soc_is_omap44xx())
 		writel_relaxed(startup_pa, sar_base +
 			       CPU1_WAKEUP_NS_PA_ADDR_OFFSET);
 	else
diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S
index fd90125bffc7..72506e6cf9e7 100644
--- a/arch/arm/mach-omap2/omap-smc.S
+++ b/arch/arm/mach-omap2/omap-smc.S
@@ -94,6 +94,5 @@ ENTRY(omap_read_auxcoreboot0)
 	ldr	r12, =0x103
 	dsb
 	smc	#0
-	mov	r0, r0, lsr #9
 	ldmfd   sp!, {r2-r12, pc}
 ENDPROC(omap_read_auxcoreboot0)
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 003353b0b794..3faf454ba487 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/irqchip/arm-gic.h>
 
+#include <asm/sections.h>
 #include <asm/smp_scu.h>
 #include <asm/virt.h>
 
@@ -40,10 +41,14 @@
 
 #define OMAP5_CORE_COUNT	0x2
 
+#define AUX_CORE_BOOT0_GP_RELEASE	0x020
+#define AUX_CORE_BOOT0_HS_RELEASE	0x200
+
 struct omap_smp_config {
 	unsigned long cpu1_rstctrl_pa;
 	void __iomem *cpu1_rstctrl_va;
 	void __iomem *scu_base;
+	void __iomem *wakeupgen_base;
 	void *startup_addr;
 };
 
@@ -140,7 +145,6 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	static struct clockdomain *cpu1_clkdm;
 	static bool booted;
 	static struct powerdomain *cpu1_pwrdm;
-	void __iomem *base = omap_get_wakeupgen_base();
 
 	/*
 	 * Set synchronisation state between this boot processor
@@ -155,9 +159,11 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * A barrier is added to ensure that write buffer is drained
 	 */
 	if (omap_secure_apis_support())
-		omap_modify_auxcoreboot0(0x200, 0xfffffdff);
+		omap_modify_auxcoreboot0(AUX_CORE_BOOT0_HS_RELEASE,
+					 0xfffffdff);
 	else
-		writel_relaxed(0x20, base + OMAP_AUX_CORE_BOOT_0);
+		writel_relaxed(AUX_CORE_BOOT0_GP_RELEASE,
+			       cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_0);
 
 	if (!cpu1_clkdm && !cpu1_pwrdm) {
 		cpu1_clkdm = clkdm_lookup("mpu1_clkdm");
@@ -261,9 +267,72 @@ static void __init omap4_smp_init_cpus(void)
 		set_cpu_possible(i, true);
 }
 
+/*
+ * For now, just make sure the start-up address is not within the booting
+ * kernel space as that means we just overwrote whatever secondary_startup()
+ * code there was.
+ */
+static bool __init omap4_smp_cpu1_startup_valid(unsigned long addr)
+{
+	if ((addr >= __pa(PAGE_OFFSET)) && (addr <= __pa(__bss_start)))
+		return false;
+
+	return true;
+}
+
+/*
+ * We may need to reset CPU1 before configuring, otherwise kexec boot can end
+ * up trying to use old kernel startup address or suspend-resume will
+ * occasionally fail to bring up CPU1 on 4430 if CPU1 fails to enter deeper
+ * idle states.
+ */
+static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
+{
+	unsigned long cpu1_startup_pa, cpu1_ns_pa_addr;
+	bool needs_reset = false;
+	u32 released;
+
+	if (omap_secure_apis_support())
+		released = omap_read_auxcoreboot0() & AUX_CORE_BOOT0_HS_RELEASE;
+	else
+		released = readl_relaxed(cfg.wakeupgen_base +
+					 OMAP_AUX_CORE_BOOT_0) &
+						AUX_CORE_BOOT0_GP_RELEASE;
+	if (released) {
+		pr_warn("smp: CPU1 not parked?\n");
+
+		return;
+	}
+
+	cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base +
+					OMAP_AUX_CORE_BOOT_1);
+	cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
+
+	/* Did the configured secondary_startup() get overwritten? */
+	if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa))
+		needs_reset = true;
+
+	/*
+	 * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a
+	 * deeper idle state in WFI and will wake to an invalid address.
+	 */
+	if ((soc_is_omap44xx() || soc_is_omap54xx()) &&
+	    !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
+		needs_reset = true;
+
+	if (!needs_reset || !c->cpu1_rstctrl_va)
+		return;
+
+	pr_info("smp: CPU1 parked within kernel, needs reset (0x%lx 0x%lx)\n",
+		cpu1_startup_pa, cpu1_ns_pa_addr);
+
+	writel_relaxed(1, c->cpu1_rstctrl_va);
+	readl_relaxed(c->cpu1_rstctrl_va);
+	writel_relaxed(0, c->cpu1_rstctrl_va);
+}
+
 static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 {
-	void __iomem *base = omap_get_wakeupgen_base();
 	const struct omap_smp_config *c = NULL;
 
 	if (soc_is_omap443x())
@@ -281,6 +350,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 	/* Must preserve cfg.scu_base set earlier */
 	cfg.cpu1_rstctrl_pa = c->cpu1_rstctrl_pa;
 	cfg.startup_addr = c->startup_addr;
+	cfg.wakeupgen_base = omap_get_wakeupgen_base();
 
 	if (soc_is_dra74x() || soc_is_omap54xx()) {
 		if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
@@ -299,15 +369,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 	if (cfg.scu_base)
 		scu_enable(cfg.scu_base);
 
-	/*
-	 * Reset CPU1 before configuring, otherwise kexec will
-	 * end up trying to use old kernel startup address.
-	 */
-	if (cfg.cpu1_rstctrl_va) {
-		writel_relaxed(1, cfg.cpu1_rstctrl_va);
-		readl_relaxed(cfg.cpu1_rstctrl_va);
-		writel_relaxed(0, cfg.cpu1_rstctrl_va);
-	}
+	omap4_smp_maybe_reset_cpu1(&cfg);
 
 	/*
 	 * Write the address of secondary startup routine into the
@@ -319,7 +381,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 		omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr));
 	else
 		writel_relaxed(__pa_symbol(cfg.startup_addr),
-			       base + OMAP_AUX_CORE_BOOT_1);
+			       cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_1);
 }
 
 const struct smp_operations omap4_smp_ops __initconst = {
-- 
cgit v1.2.3


From b03b99a329a14b7302f37c3ea6da3848db41c8c5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 27 Mar 2017 21:53:38 -0700
Subject: acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit
 comparison)

While reviewing the -stable patch for commit 86ef58a4e35e "nfit,
libnvdimm: fix interleave set cookie calculation" Ben noted:

    "This is returning an int, thus it's effectively doing a 32-bit
     comparison and not the 64-bit comparison you say is needed."

Update the compare operation to be immune to this integer demotion problem.

Cc: <stable@vger.kernel.org>
Cc: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Fixes: 86ef58a4e35e ("nfit, libnvdimm: fix interleave set cookie calculation")
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 662036bdc65e..c8ea9d698cd0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
 	const struct nfit_set_info_map *map0 = m0;
 	const struct nfit_set_info_map *map1 = m1;
 
-	return map0->region_offset - map1->region_offset;
+	if (map0->region_offset < map1->region_offset)
+		return -1;
+	else if (map0->region_offset > map1->region_offset)
+		return 1;
+	return 0;
 }
 
 /* Retrieve the nth entry referencing this spa */
-- 
cgit v1.2.3


From d43e85b7d7c759a01f92fa09bba5d6eb6e406795 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 12:15:41 +0200
Subject: ARM: orion5x: only call into phylib when available

Board code cannot call mdiobus_register_board_info() when phylib
or mdio_device is a loadable module:

arch/arm/plat-orion/common.o: In function `orion_ge00_switch_init':
:(.init.text+0x474): undefined reference to `mdiobus_register_board_info'

I had a number of ideas for how this could be solved, but after the MDIO
code got split out from PHYLIB it has gotten too hard, so I'm basically
giving up, and only call the mdiobus_register_board_info() function
if the MDIO layer is built-in to avoid the link error. This is similar
to how we handle PHY registration on other ARM platforms.

Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs")
Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/mach-orion5x/Kconfig | 1 +
 arch/arm/plat-orion/common.c  | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
index 633442ad4e4c..2a7bb6ccdcb7 100644
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -6,6 +6,7 @@ menuconfig ARCH_ORION5X
 	select GPIOLIB
 	select MVEBU_MBUS
 	select PCI
+	select PHYLIB if NETDEVICES
 	select PLAT_ORION_LEGACY
 	help
 	  Support for the following Marvell Orion 5x series SoCs:
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 9255b6d67ba5..aff6994950ba 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -468,6 +468,7 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 		    eth_data, &orion_ge11);
 }
 
+#ifdef CONFIG_ARCH_ORION5X
 /*****************************************************************************
  * Ethernet switch
  ****************************************************************************/
@@ -480,6 +481,9 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 	struct mdio_board_info *bd;
 	unsigned int i;
 
+	if (!IS_BUILTIN(CONFIG_PHYLIB))
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
 		if (!strcmp(d->port_names[i], "cpu"))
 			break;
@@ -493,6 +497,7 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 
 	mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
 }
+#endif
 
 /*****************************************************************************
  * I2C
-- 
cgit v1.2.3


From 62e24c5775ecb387a3eb33701378ccfa6dbc98ee Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 5 Feb 2016 13:41:39 +0100
Subject: reset: add exported __reset_control_get, return NULL if optional

Rename the internal __reset_control_get/put functions to
__reset_control_get/put_internal and add an exported
__reset_control_get equivalent to __of_reset_control_get
that takes a struct device parameter.
This avoids the confusing call to __of_reset_control_get in
the non-DT case and fixes the devm_reset_control_get_optional
function to return NULL if RESET_CONTROLLER is enabled but
dev->of_node == NULL.

Fixes: bb475230b8e5 ("reset: make optional functions really optional")
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c  | 22 ++++++++++++++++------
 include/linux/reset.h | 22 ++++++++++++++--------
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index f1e5e65388bb..cd739d2fa160 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc)
 }
 EXPORT_SYMBOL_GPL(reset_control_status);
 
-static struct reset_control *__reset_control_get(
+static struct reset_control *__reset_control_get_internal(
 				struct reset_controller_dev *rcdev,
 				unsigned int index, bool shared)
 {
@@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get(
 	return rstc;
 }
 
-static void __reset_control_put(struct reset_control *rstc)
+static void __reset_control_put_internal(struct reset_control *rstc)
 {
 	lockdep_assert_held(&reset_list_mutex);
 
@@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
 	}
 
 	/* reset_list_mutex also protects the rcdev's reset_control list */
-	rstc = __reset_control_get(rcdev, rstc_id, shared);
+	rstc = __reset_control_get_internal(rcdev, rstc_id, shared);
 
 	mutex_unlock(&reset_list_mutex);
 
@@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
 }
 EXPORT_SYMBOL_GPL(__of_reset_control_get);
 
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+					  int index, bool shared, bool optional)
+{
+	if (dev->of_node)
+		return __of_reset_control_get(dev->of_node, id, index, shared,
+					      optional);
+
+	return optional ? NULL : ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL_GPL(__reset_control_get);
+
 /**
  * reset_control_put - free the reset controller
  * @rstc: reset controller
@@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc)
 		return;
 
 	mutex_lock(&reset_list_mutex);
-	__reset_control_put(rstc);
+	__reset_control_put_internal(rstc);
 	mutex_unlock(&reset_list_mutex);
 }
 EXPORT_SYMBOL_GPL(reset_control_put);
@@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	rstc = __of_reset_control_get(dev ? dev->of_node : NULL,
-				      id, index, shared, optional);
+	rstc = __reset_control_get(dev, id, index, shared, optional);
 	if (!IS_ERR(rstc)) {
 		*ptr = rstc;
 		devres_add(dev, ptr);
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 96fb139bdd08..13d8681210d5 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc);
 struct reset_control *__of_reset_control_get(struct device_node *node,
 				     const char *id, int index, bool shared,
 				     bool optional);
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+					  int index, bool shared,
+					  bool optional);
 void reset_control_put(struct reset_control *rstc);
 struct reset_control *__devm_reset_control_get(struct device *dev,
 				     const char *id, int index, bool shared,
@@ -72,6 +75,13 @@ static inline struct reset_control *__of_reset_control_get(
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct reset_control *__reset_control_get(
+					struct device *dev, const char *id,
+					int index, bool shared, bool optional)
+{
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
 					int index, bool shared, bool optional)
@@ -102,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
 #endif
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-									false);
+	return __reset_control_get(dev, id, 0, false, false);
 }
 
 /**
@@ -131,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 static inline struct reset_control *reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-									false);
+	return __reset_control_get(dev, id, 0, true, false);
 }
 
 static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-									true);
+	return __reset_control_get(dev, id, 0, false, true);
 }
 
 static inline struct reset_control *reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-									true);
+	return __reset_control_get(dev, id, 0, true, true);
 }
 
 /**
-- 
cgit v1.2.3


From 04abaf07f6d5cdf22b7a478a86e706dfeeeef960 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Thu, 30 Mar 2017 14:58:18 -0500
Subject: ARM: OMAP2+: omap_device: Sync omap_device and pm_runtime after probe
 defer

Starting from commit 5de85b9d57ab ("PM / runtime: Re-init runtime PM
states at probe error and driver unbind") pm_runtime core now changes
device runtime_status back to after RPM_SUSPENDED after a probe defer.
Certain OMAP devices make use of "ti,no-idle-on-init" flag which causes
omap_device_enable to be called during the BUS_NOTIFY_ADD_DEVICE event
during probe, along with pm_runtime_set_active.

This call to pm_runtime_set_active typically will prevent a call to
pm_runtime_get in a driver probe function from re-enabling the
omap_device. However, in the case of a probe defer that happens before
the driver probe function is able to run, such as a missing pinctrl
states defer, pm_runtime_reinit will set the device as RPM_SUSPENDED and
then once driver probe is actually able to run, pm_runtime_get will see
the device as suspended and call through to the omap_device layer,
attempting to enable the already enabled omap_device and causing errors
like this:

omap-gpmc 50000000.gpmc: omap_device: omap_device_enable() called from
invalid state 1
omap-gpmc 50000000.gpmc: use pm_runtime_put_sync_suspend() in driver?

We can avoid this error by making sure the pm_runtime status of a device
matches the omap_device state before a probe attempt. By extending the
omap_device bus notifier to act on the BUS_NOTIFY_BIND_DRIVER event we
can check if a device is enabled in omap_device but with a pm_runtime
status of RPM_SUSPENDED and once again mark the device as RPM_ACTIVE to
avoid a second incorrect call to omap_device_enable.

Fixes: 5de85b9d57ab ("PM / runtime: Re-init runtime PM states at probe
error and driver unbind")
Tested-by: Franklin S Cooper Jr. <fcooper@ti.com>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_device.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index e920dd83e443..f989145480c8 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -222,6 +222,14 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 				dev_err(dev, "failed to idle\n");
 		}
 		break;
+	case BUS_NOTIFY_BIND_DRIVER:
+		od = to_omap_device(pdev);
+		if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) &&
+		    pm_runtime_status_suspended(dev)) {
+			od->_driver_status = BUS_NOTIFY_BIND_DRIVER;
+			pm_runtime_set_active(dev);
+		}
+		break;
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (pdev->dev.of_node)
 			omap_device_build_from_dt(pdev);
-- 
cgit v1.2.3


From fe514739d8538783749d3ce72f78e5a999ea5668 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 4 Apr 2017 15:08:36 -0700
Subject: libnvdimm: fix blk free space accounting

Commit a1f3e4d6a0c3 "libnvdimm, region: update nd_region_available_dpa()
for multi-pmem support" reworked blk dpa (DIMM Physical Address)
accounting to comprehend multiple pmem namespace allocations aliasing
with a given blk-dpa range.

The following call trace is a result of failing to account for allocated
blk capacity.

 WARNING: CPU: 1 PID: 2433 at tools/testing/nvdimm/../../../drivers/nvdimm/names
4 size_store+0x6f3/0x930 [libnvdimm]
 nd_region region5: allocation underrun: 0x0 of 0x1000000 bytes
 [..]
 Call Trace:
  dump_stack+0x86/0xc3
  __warn+0xcb/0xf0
  warn_slowpath_fmt+0x5f/0x80
  size_store+0x6f3/0x930 [libnvdimm]
  dev_attr_store+0x18/0x30

If a given blk-dpa allocation does not alias with any pmem ranges then
the full allocation should be accounted as busy space, not the size of
the current pmem contribution to the region.

The thinkos that led to this confusion was not realizing that the struct
resource management is already guaranteeing no collisions between pmem
allocations and blk allocations on the same dimm. Also, we do not try to
support blk allocations in aliased pmem holes.

This patch also fixes a case where the available blk goes negative.

Cc: <stable@vger.kernel.org>
Fixes: a1f3e4d6a0c3 ("libnvdimm, region: update nd_region_available_dpa() for multi-pmem support").
Reported-by: Dariusz Dokupil <dariusz.dokupil@intel.com>
Reported-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/dimm_devs.c | 77 +++++++---------------------------------------
 1 file changed, 11 insertions(+), 66 deletions(-)

diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 0eedc49e0d47..8b721321be5b 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
 
 int alias_dpa_busy(struct device *dev, void *data)
 {
-	resource_size_t map_end, blk_start, new, busy;
+	resource_size_t map_end, blk_start, new;
 	struct blk_alloc_info *info = data;
 	struct nd_mapping *nd_mapping;
 	struct nd_region *nd_region;
@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
  retry:
 	/*
 	 * Find the free dpa from the end of the last pmem allocation to
-	 * the end of the interleave-set mapping that is not already
-	 * covered by a blk allocation.
+	 * the end of the interleave-set mapping.
 	 */
-	busy = 0;
 	for_each_dpa_resource(ndd, res) {
+		if (strncmp(res->name, "pmem", 4) != 0)
+			continue;
 		if ((res->start >= blk_start && res->start < map_end)
 				|| (res->end >= blk_start
 					&& res->end <= map_end)) {
-			if (strncmp(res->name, "pmem", 4) == 0) {
-				new = max(blk_start, min(map_end + 1,
-							res->end + 1));
-				if (new != blk_start) {
-					blk_start = new;
-					goto retry;
-				}
-			} else
-				busy += min(map_end, res->end)
-					- max(nd_mapping->start, res->start) + 1;
-		} else if (nd_mapping->start > res->start
-				&& map_end < res->end) {
-			/* total eclipse of the PMEM region mapping */
-			busy += nd_mapping->size;
-			break;
+			new = max(blk_start, min(map_end + 1, res->end + 1));
+			if (new != blk_start) {
+				blk_start = new;
+				goto retry;
+			}
 		}
 	}
 
@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
 		return 1;
 	}
 
-	info->available -= blk_start - nd_mapping->start + busy;
+	info->available -= blk_start - nd_mapping->start;
 
 	return 0;
 }
 
-static int blk_dpa_busy(struct device *dev, void *data)
-{
-	struct blk_alloc_info *info = data;
-	struct nd_mapping *nd_mapping;
-	struct nd_region *nd_region;
-	resource_size_t map_end;
-	int i;
-
-	if (!is_nd_pmem(dev))
-		return 0;
-
-	nd_region = to_nd_region(dev);
-	for (i = 0; i < nd_region->ndr_mappings; i++) {
-		nd_mapping  = &nd_region->mapping[i];
-		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
-			break;
-	}
-
-	if (i >= nd_region->ndr_mappings)
-		return 0;
-
-	map_end = nd_mapping->start + nd_mapping->size - 1;
-	if (info->res->start >= nd_mapping->start
-			&& info->res->start < map_end) {
-		if (info->res->end <= map_end) {
-			info->busy = 0;
-			return 1;
-		} else {
-			info->busy -= info->res->end - map_end;
-			return 0;
-		}
-	} else if (info->res->end >= nd_mapping->start
-			&& info->res->end <= map_end) {
-		info->busy -= nd_mapping->start - info->res->start;
-		return 0;
-	} else {
-		info->busy -= nd_mapping->size;
-		return 0;
-	}
-}
-
 /**
  * nd_blk_available_dpa - account the unused dpa of BLK region
  * @nd_mapping: container of dpa-resource-root + labels
@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
 	for_each_dpa_resource(ndd, res) {
 		if (strncmp(res->name, "blk", 3) != 0)
 			continue;
-
-		info.res = res;
-		info.busy = resource_size(res);
-		device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
-		info.available -= info.busy;
+		info.available -= resource_size(res);
 	}
 
 	return info.available;
-- 
cgit v1.2.3


From 0d98479738b950e30bb4f782d60099d44076ad67 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Wed, 5 Apr 2017 22:30:34 +0800
Subject: arm64: allwinner: a64: add pmu0 regs for USB PHY

The USB PHY in A64 has a "pmu0" region, which controls the EHCI/OHCI
controller pair that can be connected to the PHY0.

Add the MMIO region for PHY node.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 1c64ea2d23f9..0565779e66fa 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -179,8 +179,10 @@
 		usbphy: phy@01c19400 {
 			compatible = "allwinner,sun50i-a64-usb-phy";
 			reg = <0x01c19400 0x14>,
+			      <0x01c1a800 0x4>,
 			      <0x01c1b800 0x4>;
 			reg-names = "phy_ctrl",
+				    "pmu0",
 				    "pmu1";
 			clocks = <&ccu CLK_USB_PHY0>,
 				 <&ccu CLK_USB_PHY1>;
-- 
cgit v1.2.3


From 6780414519f91c2a84da9baa963a940ac916f803 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Tue, 28 Mar 2017 12:41:26 +0800
Subject: scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

[mkp: tweaked to avoid setting rw_max twice and added typecast]

Cc: <stable@vger.kernel.org> # v4.4+
Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fb9b4d29af0b..906cd6bfa2a9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2956,7 +2956,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
 		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
 		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
 	} else
-		rw_max = BLK_DEF_MAX_SECTORS;
+		rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
+				      (sector_t)BLK_DEF_MAX_SECTORS);
 
 	/* Combine with controller limits */
 	q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
cgit v1.2.3


From a00a7862513089f17209b732f230922f1942e0b9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 17 Mar 2017 08:47:14 -0400
Subject: scsi: sr: Sanity check returned mode data

Kefeng Wang discovered that old versions of the QEMU CD driver would
return mangled mode data causing us to walk off the end of the buffer in
an attempt to parse it. Sanity check the returned mode sense data.

Cc: <stable@vger.kernel.org>
Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0b29b9329b1c..a8f630213a1a 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -836,6 +836,7 @@ static void get_capabilities(struct scsi_cd *cd)
 	unsigned char *buffer;
 	struct scsi_mode_data data;
 	struct scsi_sense_hdr sshdr;
+	unsigned int ms_len = 128;
 	int rc, n;
 
 	static const char *loadmech[] =
@@ -862,10 +863,11 @@ static void get_capabilities(struct scsi_cd *cd)
 	scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
 
 	/* ask for mode page 0x2a */
-	rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
+	rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len,
 			     SR_TIMEOUT, 3, &data, NULL);
 
-	if (!scsi_status_is_good(rc)) {
+	if (!scsi_status_is_good(rc) || data.length > ms_len ||
+	    data.header_length + data.block_descriptor_length > data.length) {
 		/* failed, drive doesn't have capabilities mode page */
 		cd->cdi.speed = 1;
 		cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R |
-- 
cgit v1.2.3


From 8eaf7dfcfcf222e56f7d1e0a9ffdd7be0f300c2f Mon Sep 17 00:00:00 2001
From: Chad Dupuis <chad.dupuis@cavium.com>
Date: Thu, 23 Mar 2017 06:58:47 -0700
Subject: scsi: qedf: Fix crash due to unsolicited FIP VLAN response.

We need to initialize qedf->fipvlan_compl in __qedf_probe so that if we
receive an unsolicited FIP VLAN response, the system doesn't crash due
to trying to complete an uninitialized completion.

Also add a check to see if there are any waiters on the completion so we
don't inadvertantly kick start the discovery process due to the
unsolicited frame.

Fixed the crash:

<1>BUG: unable to handle kernel NULL pointer dereference at (null)
<1>IP: [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4>PGD 0
<4>Oops: 0000 [#1] SMP
<4>last sysfs file: /sys/devices/system/cpu/online
<4>CPU 7
<4>Modules linked in: autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic fcoe 8021q garp stp llc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 vfat fat uinput ipmi_devintf microcode power_meter acpi_ipmi ipmi_si ipmi_msghandler iTCO_wdt iTCO_vendor_support dcdbas sg joydev sb_edac edac_core lpc_ich mfd_core shpchp tg3 ptp pps_core ext4 jbd2 mbcache sr_mod cdrom sd_mod crc_t10dif qedi(U) iscsi_boot_sysfs libiscsi scsi_transport_iscsi uio qedf(U) libfcoe libfc scsi_transport_fc scsi_tgt qede(U) qed(U) ahci megaraid_sas wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]
<4>
<4>Pid: 1485, comm: qedf_11_ll2 Not tainted 2.6.32-642.el6.x86_64 #1 Dell Inc. PowerEdge R730/0599V5
<4>RIP: 0010:[<ffffffff8105ed71>]  [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4>RSP: 0018:ffff881068a83d50  EFLAGS: 00010086
<4>RAX: ffffffffffffffe8 RBX: ffff88106bf42de0 RCX: 0000000000000000
<4>RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffff88106bf42de0
<4>RBP: ffff881068a83d90 R08: 0000000000000000 R09: 00000000fffffffe
<4>R10: 0000000000000000 R11: 000000000000000b R12: 0000000000000286
<4>R13: ffff88106bf42de8 R14: 0000000000000000 R15: 0000000000000000
<4>FS:  0000000000000000(0000) GS:ffff88089c460000(0000) knlGS:0000000000000000
<4>CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
<4>CR2: 0000000000000000 CR3: 0000000001a8d000 CR4: 00000000001407e0
<4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
<4>Process qedf_11_ll2 (pid: 1485, threadinfo ffff881068a80000, task ffff881068a70040)
<4>Stack:
<4> ffff88106ef00090 0000000300000001 ffff881068a83d90 ffff88106bf42de0
<4><d> 0000000000000286 ffff88106bf42dd8 ffff88106bf40a50 0000000000000002
<4><d> ffff881068a83dc0 ffffffff810634c7 ffff881000000003 000000000000000b
<4>Call Trace:
<4> [<ffffffff810634c7>] complete+0x47/0x60
<4> [<ffffffffa01d37e7>] qedf_fip_recv+0x1c7/0x450 [qedf]
<4> [<ffffffffa01cb3cb>] qedf_ll2_recv_thread+0x33b/0x510 [qedf]
<4> [<ffffffffa01cb090>] ? qedf_ll2_recv_thread+0x0/0x510 [qedf]
<4> [<ffffffff810a662e>] kthread+0x9e/0xc0
<4> [<ffffffff8100c28a>] child_rip+0xa/0x20
<4> [<ffffffff810a6590>] ? kthread+0x0/0xc0
<4> [<ffffffff8100c280>] ? child_rip+0x0/0x20
<4>Code: 41 56 41 55 41 54 53 48 83 ec 18 0f 1f 44 00 00 89 75 cc 89 55 c8 4c 8d 6f 08 48 8b 57 08 41 89 cf 4d 89 c6 48 8d 42 e8 49 39 d5 <48> 8b 58 18 74 3f 48 83 eb 18 eb 0a 0f 1f 00 48 89 d8 48 8d 5a
<1>RIP  [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4> RSP <ffff881068a83d50>
<4>CR2: 0000000000000000

Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_fip.c  | 3 ++-
 drivers/scsi/qedf/qedf_main.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index ed58b9104f58..e10b91cc3c62 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -99,7 +99,8 @@ static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
 		qedf_set_vlan_id(qedf, vid);
 
 		/* Inform waiter that it's ok to call fcoe_ctlr_link up() */
-		complete(&qedf->fipvlan_compl);
+		if (!completion_done(&qedf->fipvlan_compl))
+			complete(&qedf->fipvlan_compl);
 	}
 }
 
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 8e2a160490e6..cceddd995a4b 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2803,6 +2803,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		atomic_set(&qedf->num_offloads, 0);
 		qedf->stop_io_on_error = false;
 		pci_set_drvdata(pdev, qedf);
+		init_completion(&qedf->fipvlan_compl);
 
 		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
 		   "QLogic FastLinQ FCoE Module qedf %s, "
-- 
cgit v1.2.3


From bf6061b17a8d47ef0d9344d3ef576a4ff0edf793 Mon Sep 17 00:00:00 2001
From: Sawan Chandak <sawan.chandak@cavium.com>
Date: Fri, 31 Mar 2017 14:37:03 -0700
Subject: scsi: qla2xxx: Add fix to read correct register value for ISP82xx.

Add fix to read correct register value for ISP82xx, during check for
register disconnect.ISP82xx has different base register.

Fixes: a465537ad1a4 ("qla2xxx: Disable the adapter and skip error recovery in case of register disconnect")
Signed-off-by: Sawan Chandak <sawan.chandak@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 579363a6f44f..c9e45d2befe5 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1126,8 +1126,13 @@ static inline
 uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
 {
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
+	struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
 
-	return ((RD_REG_DWORD(&reg->host_status)) == ISP_REG_DISCONNECT);
+	if (IS_P3P_TYPE(ha))
+		return ((RD_REG_DWORD(&reg82->host_int)) == ISP_REG_DISCONNECT);
+	else
+		return ((RD_REG_DWORD(&reg->host_status)) ==
+			ISP_REG_DISCONNECT);
 }
 
 /**************************************************************************
-- 
cgit v1.2.3


From 7c856152cb92f8eee2df29ef325a1b1f43161aff Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 4 Apr 2017 10:42:30 -0400
Subject: scsi: sd: Fix capacity calculation with 32-bit sector_t

We previously made sure that the reported disk capacity was less than
0xffffffff blocks when the kernel was not compiled with large sector_t
support (CONFIG_LBDAF). However, this check assumed that the capacity
was reported in units of 512 bytes.

Add a sanity check function to ensure that we only enable disks if the
entire reported capacity can be expressed in terms of sector_t.

Cc: <stable@vger.kernel.org>
Reported-by: Steve Magnani <steve.magnani@digidescorp.com>
Cc: Bart Van Assche <Bart.VanAssche@sandisk.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 906cd6bfa2a9..fe0f7997074e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2102,6 +2102,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
 
 #define READ_CAPACITY_RETRIES_ON_RESET	10
 
+/*
+ * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set
+ * and the reported logical block size is bigger than 512 bytes. Note
+ * that last_sector is a u64 and therefore logical_to_sectors() is not
+ * applicable.
+ */
+static bool sd_addressable_capacity(u64 lba, unsigned int sector_size)
+{
+	u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9);
+
+	if (sizeof(sector_t) == 4 && last_sector > U32_MAX)
+		return false;
+
+	return true;
+}
+
 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 						unsigned char *buffer)
 {
@@ -2167,7 +2183,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 		return -ENODEV;
 	}
 
-	if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) {
+	if (!sd_addressable_capacity(lba, sector_size)) {
 		sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
 			"kernel compiled with support for large block "
 			"devices.\n");
@@ -2256,7 +2272,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
 		return sector_size;
 	}
 
-	if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
+	if (!sd_addressable_capacity(lba, sector_size)) {
 		sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
 			"kernel compiled with support for large block "
 			"devices.\n");
-- 
cgit v1.2.3


From 096e9e912ba8c973213e57701e0591e01064be26 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:50 +0300
Subject: nvme-loop: Fix sqsize wrong assignment based on ctrl MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/target/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 22f7bc6bac7f..c7b0b6a52708 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -392,7 +392,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)
-- 
cgit v1.2.3


From 1af76ddaa80e3bca8001b535e44aaaebd3e1907e Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:51 +0300
Subject: nvme-rdma: Fix sqsize wrong assignment based on ctrl MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 47a479f26e5d..16f84eb0b95e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1606,7 +1606,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)
-- 
cgit v1.2.3


From c6c64a942c87faa6fca68f5dd208094b8cf61236 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:52 +0300
Subject: nvme-fc: Fix sqsize wrong assignment based on ctrl MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9690beb15e69..d996ca73d3be 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2023,7 +2023,7 @@ nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)
-- 
cgit v1.2.3


From aa4a829bdaced81e70c215a84ef6595ce8bd4308 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 8 Apr 2017 10:02:46 +0800
Subject: crypto: xts - Fix use-after-free on EINPROGRESS

When we get an EINPROGRESS completion in xts, we will end up marking
the request as done and freeing it.  This then blows up when the
request is really completed as we've already freed the memory.

Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher")
Cc: <stable@vger.kernel.org>
Reported-by: Nathan Royce <nroycea+kernel@gmail.com>
Reported-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 crypto/xts.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/crypto/xts.c b/crypto/xts.c
index c976bfac29da..89ace5ebc2da 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -286,6 +286,13 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
 	struct rctx *rctx;
 
 	rctx = skcipher_request_ctx(req);
+
+	if (err == -EINPROGRESS) {
+		if (rctx->left != req->cryptlen)
+			return;
+		goto out;
+	}
+
 	subreq = &rctx->subreq;
 	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
 
@@ -293,6 +300,7 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
 	if (rctx->left)
 		return;
 
+out:
 	skcipher_request_complete(req, err);
 }
 
@@ -330,6 +338,13 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
 	struct rctx *rctx;
 
 	rctx = skcipher_request_ctx(req);
+
+	if (err == -EINPROGRESS) {
+		if (rctx->left != req->cryptlen)
+			return;
+		goto out;
+	}
+
 	subreq = &rctx->subreq;
 	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
 
@@ -337,6 +352,7 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
 	if (rctx->left)
 		return;
 
+out:
 	skcipher_request_complete(req, err);
 }
 
-- 
cgit v1.2.3


From 4702bbeefb490e315189636a5588628c1151223d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 10 Apr 2017 17:15:48 +0800
Subject: crypto: lrw - Fix use-after-free on EINPROGRESS

When we get an EINPROGRESS completion in lrw, we will end up marking
the request as done and freeing it.  This then blows up when the
request is really completed as we've already freed the memory.

Fixes: 700cb3f5fe75 ("crypto: lrw - Convert to skcipher")
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/lrw.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/crypto/lrw.c b/crypto/lrw.c
index 3ea095adafd9..a8bfae4451bf 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -345,6 +345,13 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
 	struct rctx *rctx;
 
 	rctx = skcipher_request_ctx(req);
+
+	if (err == -EINPROGRESS) {
+		if (rctx->left != req->cryptlen)
+			return;
+		goto out;
+	}
+
 	subreq = &rctx->subreq;
 	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
 
@@ -352,6 +359,7 @@ static void encrypt_done(struct crypto_async_request *areq, int err)
 	if (rctx->left)
 		return;
 
+out:
 	skcipher_request_complete(req, err);
 }
 
@@ -389,6 +397,13 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
 	struct rctx *rctx;
 
 	rctx = skcipher_request_ctx(req);
+
+	if (err == -EINPROGRESS) {
+		if (rctx->left != req->cryptlen)
+			return;
+		goto out;
+	}
+
 	subreq = &rctx->subreq;
 	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
 
@@ -396,6 +411,7 @@ static void decrypt_done(struct crypto_async_request *areq, int err)
 	if (rctx->left)
 		return;
 
+out:
 	skcipher_request_complete(req, err);
 }
 
-- 
cgit v1.2.3


From ef0579b64e93188710d48667cb5e014926af9f1b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 10 Apr 2017 17:27:57 +0800
Subject: crypto: ahash - Fix EINPROGRESS notification callback

The ahash API modifies the request's callback function in order
to clean up after itself in some corner cases (unaligned final
and missing finup).

When the request is complete ahash will restore the original
callback and everything is fine.  However, when the request gets
an EBUSY on a full queue, an EINPROGRESS callback is made while
the request is still ongoing.

In this case the ahash API will incorrectly call its own callback.

This patch fixes the problem by creating a temporary request
object on the stack which is used to relay EINPROGRESS back to
the original completion function.

This patch also adds code to preserve the original flags value.

Fixes: ab6bf4e5e5e4 ("crypto: hash - Fix the pointer voodoo in...")
Cc: <stable@vger.kernel.org>
Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Tested-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 79 ++++++++++++++++++++++++++----------------
 include/crypto/internal/hash.h | 10 ++++++
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/crypto/ahash.c b/crypto/ahash.c
index e58c4970c22b..826cd7ab4d4a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -32,6 +32,7 @@ struct ahash_request_priv {
 	crypto_completion_t complete;
 	void *data;
 	u8 *result;
+	u32 flags;
 	void *ubuf[] CRYPTO_MINALIGN_ATTR;
 };
 
@@ -253,6 +254,8 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
 	priv->result = req->result;
 	priv->complete = req->base.complete;
 	priv->data = req->base.data;
+	priv->flags = req->base.flags;
+
 	/*
 	 * WARNING: We do not backup req->priv here! The req->priv
 	 *          is for internal use of the Crypto API and the
@@ -267,38 +270,44 @@ static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt)
 	return 0;
 }
 
-static void ahash_restore_req(struct ahash_request *req)
+static void ahash_restore_req(struct ahash_request *req, int err)
 {
 	struct ahash_request_priv *priv = req->priv;
 
+	if (!err)
+		memcpy(priv->result, req->result,
+		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+
 	/* Restore the original crypto request. */
 	req->result = priv->result;
-	req->base.complete = priv->complete;
-	req->base.data = priv->data;
+
+	ahash_request_set_callback(req, priv->flags,
+				   priv->complete, priv->data);
 	req->priv = NULL;
 
 	/* Free the req->priv.priv from the ADJUSTED request. */
 	kzfree(priv);
 }
 
-static void ahash_op_unaligned_finish(struct ahash_request *req, int err)
+static void ahash_notify_einprogress(struct ahash_request *req)
 {
 	struct ahash_request_priv *priv = req->priv;
+	struct crypto_async_request oreq;
 
-	if (err == -EINPROGRESS)
-		return;
-
-	if (!err)
-		memcpy(priv->result, req->result,
-		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+	oreq.data = priv->data;
 
-	ahash_restore_req(req);
+	priv->complete(&oreq, -EINPROGRESS);
 }
 
 static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
 {
 	struct ahash_request *areq = req->data;
 
+	if (err == -EINPROGRESS) {
+		ahash_notify_einprogress(areq);
+		return;
+	}
+
 	/*
 	 * Restore the original request, see ahash_op_unaligned() for what
 	 * goes where.
@@ -309,7 +318,7 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
 	 */
 
 	/* First copy req->result into req->priv.result */
-	ahash_op_unaligned_finish(areq, err);
+	ahash_restore_req(areq, err);
 
 	/* Complete the ORIGINAL request. */
 	areq->base.complete(&areq->base, err);
@@ -325,7 +334,12 @@ static int ahash_op_unaligned(struct ahash_request *req,
 		return err;
 
 	err = op(req);
-	ahash_op_unaligned_finish(req, err);
+	if (err == -EINPROGRESS ||
+	    (err == -EBUSY && (ahash_request_flags(req) &
+			       CRYPTO_TFM_REQ_MAY_BACKLOG)))
+		return err;
+
+	ahash_restore_req(req, err);
 
 	return err;
 }
@@ -360,25 +374,14 @@ int crypto_ahash_digest(struct ahash_request *req)
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
 
-static void ahash_def_finup_finish2(struct ahash_request *req, int err)
+static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
 {
-	struct ahash_request_priv *priv = req->priv;
+	struct ahash_request *areq = req->data;
 
 	if (err == -EINPROGRESS)
 		return;
 
-	if (!err)
-		memcpy(priv->result, req->result,
-		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
-
-	ahash_restore_req(req);
-}
-
-static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
-{
-	struct ahash_request *areq = req->data;
-
-	ahash_def_finup_finish2(areq, err);
+	ahash_restore_req(areq, err);
 
 	areq->base.complete(&areq->base, err);
 }
@@ -389,11 +392,15 @@ static int ahash_def_finup_finish1(struct ahash_request *req, int err)
 		goto out;
 
 	req->base.complete = ahash_def_finup_done2;
-	req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
 	err = crypto_ahash_reqtfm(req)->final(req);
+	if (err == -EINPROGRESS ||
+	    (err == -EBUSY && (ahash_request_flags(req) &
+			       CRYPTO_TFM_REQ_MAY_BACKLOG)))
+		return err;
 
 out:
-	ahash_def_finup_finish2(req, err);
+	ahash_restore_req(req, err);
 	return err;
 }
 
@@ -401,7 +408,16 @@ static void ahash_def_finup_done1(struct crypto_async_request *req, int err)
 {
 	struct ahash_request *areq = req->data;
 
+	if (err == -EINPROGRESS) {
+		ahash_notify_einprogress(areq);
+		return;
+	}
+
+	areq->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
 	err = ahash_def_finup_finish1(areq, err);
+	if (areq->priv)
+		return;
 
 	areq->base.complete(&areq->base, err);
 }
@@ -416,6 +432,11 @@ static int ahash_def_finup(struct ahash_request *req)
 		return err;
 
 	err = tfm->update(req);
+	if (err == -EINPROGRESS ||
+	    (err == -EBUSY && (ahash_request_flags(req) &
+			       CRYPTO_TFM_REQ_MAY_BACKLOG)))
+		return err;
+
 	return ahash_def_finup_finish1(req, err);
 }
 
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 1d4f365d8f03..f6d9af3efa45 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -166,6 +166,16 @@ static inline struct ahash_instance *ahash_alloc_instance(
 	return crypto_alloc_instance2(name, alg, ahash_instance_headroom());
 }
 
+static inline void ahash_request_complete(struct ahash_request *req, int err)
+{
+	req->base.complete(&req->base, err);
+}
+
+static inline u32 ahash_request_flags(struct ahash_request *req)
+{
+	return req->base.flags;
+}
+
 static inline struct crypto_ahash *crypto_spawn_ahash(
 	struct crypto_ahash_spawn *spawn)
 {
-- 
cgit v1.2.3


From e6534aebb26e32fbab14df9c713c65e8507d17e4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 10 Apr 2017 17:59:07 +0800
Subject: crypto: algif_aead - Fix bogus request dereference in completion
 function

The algif_aead completion function tries to deduce the aead_request
from the crypto_async_request argument.  This is broken because
the API does not guarantee that the same request will be pased to
the completion function.  Only the value of req->data can be used
in the completion function.

This patch fixes it by storing a pointer to sk in areq and using
that instead of passing in sk through req->data.

Fixes: 83094e5e9e49 ("crypto: af_alg - add async support to...")
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algif_aead.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 5a8053758657..ef59d9926ee9 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -40,6 +40,7 @@ struct aead_async_req {
 	struct aead_async_rsgl first_rsgl;
 	struct list_head list;
 	struct kiocb *iocb;
+	struct sock *sk;
 	unsigned int tsgls;
 	char iv[];
 };
@@ -379,12 +380,10 @@ unlock:
 
 static void aead_async_cb(struct crypto_async_request *_req, int err)
 {
-	struct sock *sk = _req->data;
-	struct alg_sock *ask = alg_sk(sk);
-	struct aead_ctx *ctx = ask->private;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(&ctx->aead_req);
-	struct aead_request *req = aead_request_cast(_req);
+	struct aead_request *req = _req->data;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct aead_async_req *areq = GET_ASYM_REQ(req, tfm);
+	struct sock *sk = areq->sk;
 	struct scatterlist *sg = areq->tsgl;
 	struct aead_async_rsgl *rsgl;
 	struct kiocb *iocb = areq->iocb;
@@ -447,11 +446,12 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
 	memset(&areq->first_rsgl, '\0', sizeof(areq->first_rsgl));
 	INIT_LIST_HEAD(&areq->list);
 	areq->iocb = msg->msg_iocb;
+	areq->sk = sk;
 	memcpy(areq->iv, ctx->iv, crypto_aead_ivsize(tfm));
 	aead_request_set_tfm(req, tfm);
 	aead_request_set_ad(req, ctx->aead_assoclen);
 	aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				  aead_async_cb, sk);
+				  aead_async_cb, req);
 	used -= ctx->aead_assoclen;
 
 	/* take over all tx sgls from ctx */
-- 
cgit v1.2.3


From 0beb2012a1722633515c8aaa263c73449636c893 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 09:47:24 -0700
Subject: libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep
 splat

Holding the reconfig_mutex over a potential userspace fault sets up a
lockdep dependency chain between filesystem-DAX and the libnvdimm ioctl
path. Move the user access outside of the lock.

     [ INFO: possible circular locking dependency detected ]
     4.11.0-rc3+ #13 Tainted: G        W  O
     -------------------------------------------------------
     fallocate/16656 is trying to acquire lock:
      (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [<ffffffffa00080b1>] nvdimm_bus_lock+0x21/0x30 [libnvdimm]
     but task is already holding lock:
      (jbd2_handle){++++..}, at: [<ffffffff813b4944>] start_this_handle+0x104/0x460

    which lock already depends on the new lock.

    the existing dependency chain (in reverse order) is:

    -> #2 (jbd2_handle){++++..}:
            lock_acquire+0xbd/0x200
            start_this_handle+0x16a/0x460
            jbd2__journal_start+0xe9/0x2d0
            __ext4_journal_start_sb+0x89/0x1c0
            ext4_dirty_inode+0x32/0x70
            __mark_inode_dirty+0x235/0x670
            generic_update_time+0x87/0xd0
            touch_atime+0xa9/0xd0
            ext4_file_mmap+0x90/0xb0
            mmap_region+0x370/0x5b0
            do_mmap+0x415/0x4f0
            vm_mmap_pgoff+0xd7/0x120
            SyS_mmap_pgoff+0x1c5/0x290
            SyS_mmap+0x22/0x30
            entry_SYSCALL_64_fastpath+0x1f/0xc2

    -> #1 (&mm->mmap_sem){++++++}:
            lock_acquire+0xbd/0x200
            __might_fault+0x70/0xa0
            __nd_ioctl+0x683/0x720 [libnvdimm]
            nvdimm_ioctl+0x8b/0xe0 [libnvdimm]
            do_vfs_ioctl+0xa8/0x740
            SyS_ioctl+0x79/0x90
            do_syscall_64+0x6c/0x200
            return_from_SYSCALL_64+0x0/0x7a

    -> #0 (&nvdimm_bus->reconfig_mutex){+.+.+.}:
            __lock_acquire+0x16b6/0x1730
            lock_acquire+0xbd/0x200
            __mutex_lock+0x88/0x9b0
            mutex_lock_nested+0x1b/0x20
            nvdimm_bus_lock+0x21/0x30 [libnvdimm]
            nvdimm_forget_poison+0x25/0x50 [libnvdimm]
            nvdimm_clear_poison+0x106/0x140 [libnvdimm]
            pmem_do_bvec+0x1c2/0x2b0 [nd_pmem]
            pmem_make_request+0xf9/0x270 [nd_pmem]
            generic_make_request+0x118/0x3b0
            submit_bio+0x75/0x150

Cc: <stable@vger.kernel.org>
Fixes: 62232e45f4a2 ("libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices")
Cc: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 23d4a1728cdf..351bac8f6503 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
 	if (rc < 0)
 		goto out_unlock;
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+
 	if (copy_to_user(p, buf, buf_len))
 		rc = -EFAULT;
+
+	vfree(buf);
+	return rc;
+
  out_unlock:
 	nvdimm_bus_unlock(&nvdimm_bus->dev);
  out:
-- 
cgit v1.2.3


From 4aa5615e080a9855e607accc75b07ab79b252dde Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 12:25:52 -0700
Subject: libnvdimm: band aid btt vs clear poison locking

The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.

 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747
 in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
 [..]
 Call Trace:
  dump_stack+0x85/0xc8
  ___might_sleep+0x184/0x250
  __might_sleep+0x4a/0x90
  __mutex_lock+0x58/0x9b0
  ? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
  ? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
  ? acpi_nfit_forget_poison+0x79/0x80 [nfit]
  ? _raw_spin_unlock+0x27/0x40
  mutex_lock_nested+0x1b/0x20
  nvdimm_bus_lock+0x21/0x30 [libnvdimm]
  nvdimm_forget_poison+0x25/0x50 [libnvdimm]
  nvdimm_clear_poison+0x106/0x140 [libnvdimm]
  nsio_rw_bytes+0x164/0x270 [libnvdimm]
  btt_write_pg+0x1de/0x3e0 [nd_btt]
  ? blk_queue_enter+0x30/0x290
  btt_make_request+0x11a/0x310 [nd_btt]
  ? blk_queue_enter+0xb7/0x290
  ? blk_queue_enter+0x30/0x290
  generic_make_request+0x118/0x3b0

As a minimal fix, disable error clearing when the BTT is enabled for the
namespace. For the final fix a larger rework of the poison list locking
is needed.

Note that this is not a problem in the blk case since that path never
calls nvdimm_clear_poison().

Cc: <stable@vger.kernel.org>
Fixes: 82bf1037f2ca ("libnvdimm: check and clear poison before writing to pmem")
Cc: Dave Jiang <dave.jiang@intel.com>
[jeff: dynamically disable error clearing in the btt case]
Suggested-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/claim.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b3323c0697f6..ca6d572c48fc 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	}
 
 	if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
-		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
+		/*
+		 * FIXME: nsio_rw_bytes() may be called from atomic
+		 * context in the btt case and nvdimm_clear_poison()
+		 * takes a sleeping lock. Until the locking can be
+		 * reworked this capability requires that the namespace
+		 * is not claimed by btt.
+		 */
+		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
+				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
 			long cleared;
 
 			cleared = nvdimm_clear_poison(&ndns->dev, offset, size);
-- 
cgit v1.2.3


From 911e572e98656056ebbbb4274d8fe61434188646 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Thu, 6 Apr 2017 18:12:09 -0300
Subject: scsi: aacraid: fix PCI error recovery path

During a PCI error recovery, if aac_check_health() is not aware that a
PCI error happened and we have an offline PCI channel, it might trigger
some errors (like NULL pointer dereference) and inhibit the error
recovery process to complete.

This patch makes the health check procedure aware of PCI channel issues,
and in case of error recovery process, the function
aac_adapter_check_health() returns -1 and let the recovery process to
complete successfully. This patch was tested on upstream kernel
v4.11-rc5 in PowerPC ppc64le architecture with adapter 9005:028d
(VID:DID) - the error recovery procedure was able to recover fine.

Fixes: 5c63f7f710bd ("aacraid: Added EEH support")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h | 11 ++++++++---
 drivers/scsi/aacraid/commsup.c |  3 ++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index d036a806f31c..d281492009fb 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1690,9 +1690,6 @@ struct aac_dev
 #define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \
 	(dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4)
 
-#define aac_adapter_check_health(dev) \
-	(dev)->a_ops.adapter_check_health(dev)
-
 #define aac_adapter_restart(dev, bled, reset_type) \
 	((dev)->a_ops.adapter_restart(dev, bled, reset_type))
 
@@ -2615,6 +2612,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor)
 	return capacity;
 }
 
+static inline int aac_adapter_check_health(struct aac_dev *dev)
+{
+	if (unlikely(pci_channel_offline(dev->pdev)))
+		return -1;
+
+	return (dev)->a_ops.adapter_check_health(dev);
+}
+
 /* SCp.phase values */
 #define AAC_OWNER_MIDLEVEL	0x101
 #define AAC_OWNER_LOWLEVEL	0x102
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c8172f16cf33..1f4918355fdb 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1873,7 +1873,8 @@ int aac_check_health(struct aac_dev * aac)
 	spin_unlock_irqrestore(&aac->fib_lock, flagv);
 
 	if (BlinkLED < 0) {
-		printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
+		printk(KERN_ERR "%s: Host adapter is dead (or got a PCI error) %d\n",
+				aac->name, BlinkLED);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 785a470496d8e0a32e3d39f376984eb2c98ca5b3 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 11:46:04 -0300
Subject: scsi: ipr: do not set DID_PASSTHROUGH on CHECK CONDITION

On a dual controller setup with multipath enabled, some MEDIUM ERRORs
caused both paths to be failed, thus I/O got queued/blocked since the
'queue_if_no_path' feature is enabled by default on IPR controllers.

This example disabled 'queue_if_no_path' so the I/O failure is seen at
the sg_dd program.  Notice that after the sg_dd test-case, both paths
are in 'failed' state, and both path/priority groups are in 'enabled'
state (not 'active') -- which would block I/O with 'queue_if_no_path'.

    # sg_dd if=/dev/dm-2 bs=4096 count=1 dio=1 verbose=4 blk_sgio=0
    <...>
    read(unix): count=4096, res=-1
    sg_dd: reading, skip=0 : Input/output error
    <...>

    # dmesg
    [...] sd 2:2:16:0: [sds] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
    [...] sd 2:2:16:0: [sds] Sense Key : Medium Error [current]
    [...] sd 2:2:16:0: [sds] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] sd 2:2:16:0: [sds] CDB: Read(10) 28 00 00 00 00 00 00 00 20 00
    [...] blk_update_request: I/O error, dev sds, sector 0
    [...] device-mapper: multipath: Failing path 65:32.
    <...>
    [...] device-mapper: multipath: Failing path 65:224.

    # multipath -l
    1IBM_IPR-0_59C2AE0000001F80 dm-2 IBM     ,IPR-0   59C2AE00
    size=5.2T features='0' hwhandler='1 alua' wp=rw
    |-+- policy='service-time 0' prio=0 status=enabled
    | `- 2:2:16:0 sds  65:32  failed undef running
    `-+- policy='service-time 0' prio=0 status=enabled
      `- 1:2:7:0  sdae 65:224 failed undef running

This is not the desired behavior. The dm-multipath explicitly checks
for the MEDIUM ERROR case (and a few others) so not to fail the path
(e.g., I/O to other sectors could potentially happen without problems).
See dm-mpath.c :: do_end_io_bio() -> noretry_error() !->! fail_path().

The problem trace is:

1) ipr_scsi_done()  // SENSE KEY/CHECK CONDITION detected, go to..
2) ipr_erp_start()  // ipr_is_gscsi() and masked_ioasc OK, go to..
3) ipr_gen_sense()  // masked_ioasc is IPR_IOASC_MED_DO_NOT_REALLOC,
                    // so set DID_PASSTHROUGH.

4) scsi_decide_disposition()  // check for DID_PASSTHROUGH and return
                              // early on, faking a DID_OK.. *instead*
                              // of reaching scsi_check_sense().

                              // Had it reached the latter, that would
                              // set host_byte to DID_MEDIUM_ERROR.

5) scsi_finish_command()
6) scsi_io_completion()
7) __scsi_error_from_host_byte()  // That would be converted to -ENODATA
<...>
8) dm_softirq_done()
9) multipath_end_io()
10) do_end_io()
11) noretry_error()  // And that is checked in dm-mpath :: noretry_error()
                     // which would cause fail_path() not to be called.

With this patch applied, the I/O is failed but the paths are not.  This
multipath device continues accepting more I/O requests without blocking.
(and notice the different host byte/driver byte handling per SCSI layer).

    # dmesg
    [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK
    [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 40 00
    [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current]
    [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] blk_update_request: critical medium error, dev sdaf, sector 0
    [...] blk_update_request: critical medium error, dev dm-6, sector 0
    [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK
    [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 10 00
    [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current]
    [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] blk_update_request: critical medium error, dev sdaf, sector 0
    [...] blk_update_request: critical medium error, dev dm-6, sector 0
    [...] Buffer I/O error on dev dm-6, logical block 0, async page read

    # multipath -l 1IBM_IPR-0_59C2AE0000001F80
    1IBM_IPR-0_59C2AE0000001F80 dm-6 IBM     ,IPR-0   59C2AE00
    size=5.2T features='1 queue_if_no_path' hwhandler='1 alua' wp=rw
    |-+- policy='service-time 0' prio=0 status=active
    | `- 2:2:7:0  sdaf 65:240 active undef running
    `-+- policy='service-time 0' prio=0 status=enabled
      `- 1:2:7:0  sdh  8:112  active undef running

Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ipr.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 835c59c777f2..0d782135ac55 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6293,7 +6293,12 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 		break;
 	case IPR_IOASC_MED_DO_NOT_REALLOC: /* prevent retries */
 	case IPR_IOASA_IR_DUAL_IOA_DISABLED:
-		scsi_cmd->result |= (DID_PASSTHROUGH << 16);
+		/*
+		 * exception: do not set DID_PASSTHROUGH on CHECK CONDITION
+		 * so SCSI mid-layer and upper layers handle it accordingly.
+		 */
+		if (scsi_cmd->result != SAM_STAT_CHECK_CONDITION)
+			scsi_cmd->result |= (DID_PASSTHROUGH << 16);
 		break;
 	case IPR_IOASC_BUS_WAS_RESET:
 	case IPR_IOASC_BUS_WAS_RESET_BY_OTHER:
-- 
cgit v1.2.3


From 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 16:42:08 -0700
Subject: device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation

The following warning triggers with a new unit test that stresses the
device-dax interface.

 ===============================
 [ ERR: suspicious RCU usage.  ]
 4.11.0-rc4+ #1049 Tainted: G           O
 -------------------------------
 ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 0
 2 locks held by fio/9070:
  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0
  #1:  (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax]

 Call Trace:
  dump_stack+0x86/0xc3
  lockdep_rcu_suspicious+0xd7/0x110
  ___might_sleep+0xac/0x250
  __might_sleep+0x4a/0x80
  __alloc_pages_nodemask+0x23a/0x360
  alloc_pages_current+0xa1/0x1f0
  pte_alloc_one+0x17/0x80
  __pte_alloc+0x1e/0x120
  __get_locked_pte+0x1bf/0x1d0
  insert_pfn.isra.70+0x3a/0x100
  ? lookup_memtype+0xa6/0xd0
  vm_insert_mixed+0x64/0x90
  dax_dev_huge_fault+0x520/0x620 [dax]
  ? dax_dev_huge_fault+0x32/0x620 [dax]
  dax_dev_fault+0x10/0x20 [dax]
  __do_fault+0x1e/0x140
  __handle_mm_fault+0x9af/0x10d0
  handle_mm_fault+0x16d/0x370
  ? handle_mm_fault+0x47/0x370
  __do_page_fault+0x28c/0x4f0
  trace_do_page_fault+0x58/0x2a0
  do_async_page_fault+0x1a/0xa0
  async_page_fault+0x28/0x30

Inserting a page table entry may trigger an allocation while we are
holding a read lock to keep the device instance alive for the duration
of the fault. Use srcu for this keep-alive protection.

Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Cc: <stable@vger.kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/Kconfig |  1 +
 drivers/dax/dax.c   | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 3e2ab3b14eea..9e95bf94eb13 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
 	tristate "DAX: direct access to differentiated memory"
 	default m if NVDIMM_DAX
 	depends on TRANSPARENT_HUGEPAGE
+	select SRCU
 	help
 	  Support raw access to differentiated (persistence, bandwidth,
 	  latency...) memory via an mmap(2) capable character
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 80c6db279ae1..806f180c80d8 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -25,6 +25,7 @@
 #include "dax.h"
 
 static dev_t dax_devt;
+DEFINE_STATIC_SRCU(dax_srcu);
 static struct class *dax_class;
 static DEFINE_IDA(dax_minor_ida);
 static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -60,7 +61,7 @@ struct dax_region {
  * @region - parent region
  * @dev - device backing the character device
  * @cdev - core chardev data
- * @alive - !alive + rcu grace period == no new mappings can be established
+ * @alive - !alive + srcu grace period == no new mappings can be established
  * @id - child id in the region
  * @num_resources - number of physical address extents in this device
  * @res - array of physical address ranges
@@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 static int dax_dev_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
-	int rc;
+	int rc, id;
 	struct file *filp = vmf->vma->vm_file;
 	struct dax_dev *dax_dev = filp->private_data;
 
@@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 			? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end);
 
-	rcu_read_lock();
+	id = srcu_read_lock(&dax_srcu);
 	switch (pe_size) {
 	case PE_SIZE_PTE:
 		rc = __dax_dev_pte_fault(dax_dev, vmf);
@@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 	default:
 		return VM_FAULT_FALLBACK;
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&dax_srcu, id);
 
 	return rc;
 }
@@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
 	 * Note, rcu is not protecting the liveness of dax_dev, rcu is
 	 * ensuring that any fault handlers that might have seen
 	 * dax_dev->alive == true, have completed.  Any fault handlers
-	 * that start after synchronize_rcu() has started will abort
+	 * that start after synchronize_srcu() has started will abort
 	 * upon seeing dax_dev->alive == false.
 	 */
 	dax_dev->alive = false;
-	synchronize_rcu();
+	synchronize_srcu(&dax_srcu);
 	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
 	cdev_del(cdev);
 	device_unregister(dev);
-- 
cgit v1.2.3


From 11e63f6d920d6f2dfd3cd421e939a4aec9a58dcd Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 6 Apr 2017 09:04:31 -0700
Subject: x86, pmem: fix broken __copy_user_nocache cache-bypass assumptions

Before we rework the "pmem api" to stop abusing __copy_user_nocache()
for memcpy_to_pmem() we need to fix cases where we may strand dirty data
in the cpu cache. The problem occurs when copy_from_iter_pmem() is used
for arbitrary data transfers from userspace. There is no guarantee that
these transfers, performed by dax_iomap_actor(), will have aligned
destinations or aligned transfer lengths. Backstop the usage
__copy_user_nocache() with explicit cache management in these unaligned
cases.

Yes, copy_from_iter_pmem() is now too big for an inline, but addressing
that is saved for a later patch that moves the entirety of the "pmem
api" into the pmem driver directly.

Fixes: 5de490daec8b ("pmem: add copy_from_iter_pmem() and clear_pmem()")
Cc: <stable@vger.kernel.org>
Cc: <x86@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 2c1ebeb4d737..529bb4a6487a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
  * @size:	number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
- * instruction.
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
  */
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 		clwb(p);
 }
 
-/*
- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
- */
-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
-{
-	return iter_is_iovec(i) == false;
-}
-
 /**
  * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
  * @addr:	PMEM destination address
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 	/* TODO: skip the write-back by always using non-temporal stores */
 	len = copy_from_iter_nocache(addr, bytes, i);
 
-	if (__iter_needs_pmem_wb(i))
+	/*
+	 * In the iovec case on x86_64 copy_from_iter_nocache() uses
+	 * non-temporal stores for the bulk of the transfer, but we need
+	 * to manually flush if the transfer is unaligned. A cached
+	 * memory copy is used when destination or size is not naturally
+	 * aligned. That is:
+	 *   - Require 8-byte alignment when size is 8 bytes or larger.
+	 *   - Require 4-byte alignment when size is 4 bytes.
+	 *
+	 * In the non-iovec case the entire destination needs to be
+	 * flushed.
+	 */
+	if (iter_is_iovec(i)) {
+		unsigned long flushed, dest = (unsigned long) addr;
+
+		if (bytes < 8) {
+			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
+				arch_wb_cache_pmem(addr, 1);
+		} else {
+			if (!IS_ALIGNED(dest, 8)) {
+				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+				arch_wb_cache_pmem(addr, 1);
+			}
+
+			flushed = dest - (unsigned long) addr;
+			if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
+				arch_wb_cache_pmem(addr + bytes - 1, 1);
+		}
+	} else
 		arch_wb_cache_pmem(addr, bytes);
 
 	return len;
-- 
cgit v1.2.3


From a8983d01f9b7d600fbdb3916899edf395954cc83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Apr 2017 10:57:52 +0200
Subject: Revert "tty: don't panic on OOM in tty_set_ldisc()"

This reverts commit 5362544bebe85071188dd9e479b5a5040841c895 as it is
reported to cause a reproducable crash.

Fixes: 5362544bebe8 ("tty: don't panic on OOM in tty_set_ldisc()")
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <syzkaller@googlegroups.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
---
 drivers/tty/tty_ldisc.c | 85 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 16 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index b0500a0a87b8..e4603b09863a 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -491,6 +491,41 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 	tty_ldisc_debug(tty, "%p: closed\n", ld);
 }
 
+/**
+ *	tty_ldisc_restore	-	helper for tty ldisc change
+ *	@tty: tty to recover
+ *	@old: previous ldisc
+ *
+ *	Restore the previous line discipline or N_TTY when a line discipline
+ *	change fails due to an open error
+ */
+
+static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
+{
+	struct tty_ldisc *new_ldisc;
+	int r;
+
+	/* There is an outstanding reference here so this is safe */
+	old = tty_ldisc_get(tty, old->ops->num);
+	WARN_ON(IS_ERR(old));
+	tty->ldisc = old;
+	tty_set_termios_ldisc(tty, old->ops->num);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
+		/* This driver is always present */
+		new_ldisc = tty_ldisc_get(tty, N_TTY);
+		if (IS_ERR(new_ldisc))
+			panic("n_tty: get");
+		tty->ldisc = new_ldisc;
+		tty_set_termios_ldisc(tty, N_TTY);
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty), r);
+	}
+}
+
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -504,7 +539,12 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 
 int tty_set_ldisc(struct tty_struct *tty, int disc)
 {
-	int retval, old_disc;
+	int retval;
+	struct tty_ldisc *old_ldisc, *new_ldisc;
+
+	new_ldisc = tty_ldisc_get(tty, disc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
 
 	tty_lock(tty);
 	retval = tty_ldisc_lock(tty, 5 * HZ);
@@ -517,8 +557,7 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 	}
 
 	/* Check the no-op case */
-	old_disc = tty->ldisc->ops->num;
-	if (old_disc == disc)
+	if (tty->ldisc->ops->num == disc)
 		goto out;
 
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -527,25 +566,34 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 		goto out;
 	}
 
-	retval = tty_ldisc_reinit(tty, disc);
+	old_ldisc = tty->ldisc;
+
+	/* Shutdown the old discipline. */
+	tty_ldisc_close(tty, old_ldisc);
+
+	/* Now set up the new line discipline. */
+	tty->ldisc = new_ldisc;
+	tty_set_termios_ldisc(tty, disc);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
 		/* Back to the old one or N_TTY if we can't */
-		if (tty_ldisc_reinit(tty, old_disc) < 0) {
-			pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n");
-			if (tty_ldisc_reinit(tty, N_TTY) < 0) {
-				/* At this point we have tty->ldisc == NULL. */
-				pr_err("tty: reinitializing N_TTY failed\n");
-			}
-		}
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, old_ldisc);
 	}
 
-	if (tty->ldisc && tty->ldisc->ops->num != old_disc &&
-	    tty->ops->set_ldisc) {
+	if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) {
 		down_read(&tty->termios_rwsem);
 		tty->ops->set_ldisc(tty);
 		up_read(&tty->termios_rwsem);
 	}
 
+	/* At this point we hold a reference to the new ldisc and a
+	   reference to the old ldisc, or we hold two references to
+	   the old ldisc (if it was restored as part of error cleanup
+	   above). In either case, releasing a single reference from
+	   the old ldisc is correct. */
+	new_ldisc = old_ldisc;
 out:
 	tty_ldisc_unlock(tty);
 
@@ -553,6 +601,7 @@ out:
 	   already running */
 	tty_buffer_restart_work(tty->port);
 err:
+	tty_ldisc_put(new_ldisc);	/* drop the extra reference */
 	tty_unlock(tty);
 	return retval;
 }
@@ -613,8 +662,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	int retval;
 
 	ld = tty_ldisc_get(tty, disc);
-	if (IS_ERR(ld))
+	if (IS_ERR(ld)) {
+		BUG_ON(disc == N_TTY);
 		return PTR_ERR(ld);
+	}
 
 	if (tty->ldisc) {
 		tty_ldisc_close(tty, tty->ldisc);
@@ -626,8 +677,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	tty_set_termios_ldisc(tty, disc);
 	retval = tty_ldisc_open(tty, tty->ldisc);
 	if (retval) {
-		tty_ldisc_put(tty->ldisc);
-		tty->ldisc = NULL;
+		if (!WARN_ON(disc == N_TTY)) {
+			tty_ldisc_put(tty->ldisc);
+			tty->ldisc = NULL;
+		}
 	}
 	return retval;
 }
-- 
cgit v1.2.3


From 5a8d75a1b8c99bdc926ba69b7b7dbe4fae81a5af Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 14 Apr 2017 13:58:29 -0600
Subject: block: fix bio_will_gap() for first bvec with offset

Commit 729204ef49ec("block: relax check on sg gap") allows us to merge
bios, if both are physically contiguous.  This change can merge a huge
number of small bios, through mkfs for example, mkfs.ntfs running time
can be decreased to ~1/10.

But if one rq starts with a non-aligned buffer (the 1st bvec's bv_offset
is non-zero) and if we allow the merge, it is quite difficult to respect
sg gap limit, especially the max segment size, or we risk having an
unaligned virtual boundary.  This patch tries to avoid the issue by
disallowing a merge, if the req starts with an unaligned buffer.

Also add comments to explain why the merged segment can't end in
unaligned virt boundary.

Fixes: 729204ef49ec ("block: relax check on sg gap")
Tested-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>

Rewrote parts of the commit message and comments.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7548f332121a..01a696b0a4d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1672,12 +1672,36 @@ static inline bool bios_segs_mergeable(struct request_queue *q,
 	return true;
 }
 
-static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
-			 struct bio *next)
+static inline bool bio_will_gap(struct request_queue *q,
+				struct request *prev_rq,
+				struct bio *prev,
+				struct bio *next)
 {
 	if (bio_has_data(prev) && queue_virt_boundary(q)) {
 		struct bio_vec pb, nb;
 
+		/*
+		 * don't merge if the 1st bio starts with non-zero
+		 * offset, otherwise it is quite difficult to respect
+		 * sg gap limit. We work hard to merge a huge number of small
+		 * single bios in case of mkfs.
+		 */
+		if (prev_rq)
+			bio_get_first_bvec(prev_rq->bio, &pb);
+		else
+			bio_get_first_bvec(prev, &pb);
+		if (pb.bv_offset)
+			return true;
+
+		/*
+		 * We don't need to worry about the situation that the
+		 * merged segment ends in unaligned virt boundary:
+		 *
+		 * - if 'pb' ends aligned, the merged segment ends aligned
+		 * - if 'pb' ends unaligned, the next bio must include
+		 *   one single bvec of 'nb', otherwise the 'nb' can't
+		 *   merge with 'pb'
+		 */
 		bio_get_last_bvec(prev, &pb);
 		bio_get_first_bvec(next, &nb);
 
@@ -1690,12 +1714,12 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
 {
-	return bio_will_gap(req->q, req->biotail, bio);
+	return bio_will_gap(req->q, req, req->biotail, bio);
 }
 
 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
 {
-	return bio_will_gap(req->q, bio, req->bio);
+	return bio_will_gap(req->q, NULL, bio, req->bio);
 }
 
 int kblockd_schedule_work(struct work_struct *work);
-- 
cgit v1.2.3


From 82cc4fc2e70ec5baeff8f776f2773abc8b2cc0ae Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 14 Apr 2017 17:45:45 -0400
Subject: ftrace: Fix removing of second function probe

When two function probes are added to set_ftrace_filter, and then one of
them is removed, the update to the function locations is not performed, and
the record keeping of the function states are corrupted, and causes an
ftrace_bug() to occur.

This is easily reproducable by adding two probes, removing one, and then
adding it back again.

 # cd /sys/kernel/debug/tracing
 # echo schedule:traceoff > set_ftrace_filter
 # echo do_IRQ:traceoff > set_ftrace_filter
 # echo \!do_IRQ:traceoff > /debug/tracing/set_ftrace_filter
 # echo do_IRQ:traceoff > set_ftrace_filter

Causes:
 ------------[ cut here ]------------
 WARNING: CPU: 2 PID: 1098 at kernel/trace/ftrace.c:2369 ftrace_get_addr_curr+0x143/0x220
 Modules linked in: [...]
 CPU: 2 PID: 1098 Comm: bash Not tainted 4.10.0-test+ #405
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
 Call Trace:
  dump_stack+0x68/0x9f
  __warn+0x111/0x130
  ? trace_irq_work_interrupt+0xa0/0xa0
  warn_slowpath_null+0x1d/0x20
  ftrace_get_addr_curr+0x143/0x220
  ? __fentry__+0x10/0x10
  ftrace_replace_code+0xe3/0x4f0
  ? ftrace_int3_handler+0x90/0x90
  ? printk+0x99/0xb5
  ? 0xffffffff81000000
  ftrace_modify_all_code+0x97/0x110
  arch_ftrace_update_code+0x10/0x20
  ftrace_run_update_code+0x1c/0x60
  ftrace_run_modify_code.isra.48.constprop.62+0x8e/0xd0
  register_ftrace_function_probe+0x4b6/0x590
  ? ftrace_startup+0x310/0x310
  ? debug_lockdep_rcu_enabled.part.4+0x1a/0x30
  ? update_stack_state+0x88/0x110
  ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320
  ? preempt_count_sub+0x18/0xd0
  ? mutex_lock_nested+0x104/0x800
  ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320
  ? __unwind_start+0x1c0/0x1c0
  ? _mutex_lock_nest_lock+0x800/0x800
  ftrace_trace_probe_callback.isra.3+0xc0/0x130
  ? func_set_flag+0xe0/0xe0
  ? __lock_acquire+0x642/0x1790
  ? __might_fault+0x1e/0x20
  ? trace_get_user+0x398/0x470
  ? strcmp+0x35/0x60
  ftrace_trace_onoff_callback+0x48/0x70
  ftrace_regex_write.isra.43.part.44+0x251/0x320
  ? match_records+0x420/0x420
  ftrace_filter_write+0x2b/0x30
  __vfs_write+0xd7/0x330
  ? do_loop_readv_writev+0x120/0x120
  ? locks_remove_posix+0x90/0x2f0
  ? do_lock_file_wait+0x160/0x160
  ? __lock_is_held+0x93/0x100
  ? rcu_read_lock_sched_held+0x5c/0xb0
  ? preempt_count_sub+0x18/0xd0
  ? __sb_start_write+0x10a/0x230
  ? vfs_write+0x222/0x240
  vfs_write+0xef/0x240
  SyS_write+0xab/0x130
  ? SyS_read+0x130/0x130
  ? trace_hardirqs_on_caller+0x182/0x280
  ? trace_hardirqs_on_thunk+0x1a/0x1c
  entry_SYSCALL_64_fastpath+0x18/0xad
 RIP: 0033:0x7fe61c157c30
 RSP: 002b:00007ffe87890258 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: ffffffff8114a410 RCX: 00007fe61c157c30
 RDX: 0000000000000010 RSI: 000055814798f5e0 RDI: 0000000000000001
 RBP: ffff8800c9027f98 R08: 00007fe61c422740 R09: 00007fe61ca53700
 R10: 0000000000000073 R11: 0000000000000246 R12: 0000558147a36400
 R13: 00007ffe8788f160 R14: 0000000000000024 R15: 00007ffe8788f15c
  ? trace_hardirqs_off_caller+0xc0/0x110
 ---[ end trace 99fa09b3d9869c2c ]---
 Bad trampoline accounting at: ffffffff81cc3b00 (do_IRQ+0x0/0x150)

Cc: stable@vger.kernel.org
Fixes: 59df055f1991 ("ftrace: trace different functions with a different tracer")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b9691ee8f6c1..27bb2e61276e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3755,23 +3755,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
 	ftrace_probe_registered = 1;
 }
 
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
 {
 	int i;
 
 	if (!ftrace_probe_registered)
-		return;
+		return false;
 
 	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
 		struct hlist_head *hhd = &ftrace_func_hash[i];
 		if (hhd->first)
-			return;
+			return false;
 	}
 
 	/* no more funcs left */
 	ftrace_shutdown(&trace_probe_ops, 0);
 
 	ftrace_probe_registered = 0;
+	return true;
 }
 
 
@@ -3901,6 +3902,7 @@ static void
 __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 				  void *data, int flags)
 {
+	struct ftrace_ops_hash old_hash_ops;
 	struct ftrace_func_entry *rec_entry;
 	struct ftrace_func_probe *entry;
 	struct ftrace_func_probe *p;
@@ -3912,6 +3914,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	struct hlist_node *tmp;
 	char str[KSYM_SYMBOL_LEN];
 	int i, ret;
+	bool disabled;
 
 	if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
 		func_g.search = NULL;
@@ -3930,6 +3933,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
 	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
+	old_hash_ops.filter_hash = old_hash;
+	/* Probes only have filters */
+	old_hash_ops.notrace_hash = NULL;
+
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash)
 		/* Hmm, should report this somehow */
@@ -3967,12 +3974,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		}
 	}
 	mutex_lock(&ftrace_lock);
-	__disable_ftrace_function_probe();
+	disabled = __disable_ftrace_function_probe();
 	/*
 	 * Remove after the disable is called. Otherwise, if the last
 	 * probe is removed, a null hash means *all enabled*.
 	 */
 	ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+	/* still need to update the function call sites */
+	if (ftrace_enabled && !disabled)
+		ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+				       &old_hash_ops);
 	synchronize_sched();
 	if (!ret)
 		free_ftrace_hash_rcu(old_hash);
-- 
cgit v1.2.3


From 409c1b250e30ad0e48b4d15d7319b4e18c046c4f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 14 Apr 2017 14:15:20 -0400
Subject: parisc: fix bugs in pa_memcpy

The patch 554bfeceb8a22d448cd986fc9efce25e833278a1 ("parisc: Fix access
fault handling in pa_memcpy()") reimplements the pa_memcpy function.
Unfortunatelly, it makes the kernel unbootable. The crash happens in the
function ide_complete_cmd where memcpy is called with the same source
and destination address.

This patch fixes a few bugs in pa_memcpy:

* When jumping to .Lcopy_loop_16 for the first time, don't skip the
  instruction "ldi 31,t0" (this bug made the kernel unbootable)
* Use the COND macro when comparing length, so that the comparison is
  64-bit (a theoretical issue, in case the length is greater than
  0xffffffff)
* Don't use the COND macro after the "extru" instruction (the PA-RISC
  specification says that the upper 32-bits of extru result are undefined,
  although they are set to zero in practice)
* Fix exception addresses in .Lcopy16_fault and .Lcopy8_fault
* Rename .Lcopy_loop_4 to .Lcopy_loop_8 (so that it is consistent with
  .Lcopy8_fault)

Cc: <stable@vger.kernel.org> # v4.9+
Fixes: 554bfeceb8a2 ("parisc: Fix access fault handling in pa_memcpy()")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/lib/lusercopy.S | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index f01188c044ee..85c28bb80fb7 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -201,7 +201,7 @@ ENTRY_CFI(pa_memcpy)
 	add	dst,len,end
 
 	/* short copy with less than 16 bytes? */
-	cmpib,>>=,n 15,len,.Lbyte_loop
+	cmpib,COND(>>=),n 15,len,.Lbyte_loop
 
 	/* same alignment? */
 	xor	src,dst,t0
@@ -216,7 +216,7 @@ ENTRY_CFI(pa_memcpy)
 	/* loop until we are 64-bit aligned */
 .Lalign_loop64:
 	extru	dst,31,3,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_16
+	cmpib,=,n	0,t1,.Lcopy_loop_16_start
 20:	ldb,ma	1(srcspc,src),t1
 21:	stb,ma	t1,1(dstspc,dst)
 	b	.Lalign_loop64
@@ -225,6 +225,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 
+.Lcopy_loop_16_start:
 	ldi	31,t0
 .Lcopy_loop_16:
 	cmpb,COND(>>=),n t0,len,.Lword_loop
@@ -267,7 +268,7 @@ ENTRY_CFI(pa_memcpy)
 	/* loop until we are 32-bit aligned */
 .Lalign_loop32:
 	extru	dst,31,2,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_4
+	cmpib,=,n	0,t1,.Lcopy_loop_8
 20:	ldb,ma	1(srcspc,src),t1
 21:	stb,ma	t1,1(dstspc,dst)
 	b	.Lalign_loop32
@@ -277,7 +278,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 
 
-.Lcopy_loop_4:
+.Lcopy_loop_8:
 	cmpib,COND(>>=),n 15,len,.Lbyte_loop
 
 10:	ldw	0(srcspc,src),t1
@@ -299,7 +300,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 
-	b	.Lcopy_loop_4
+	b	.Lcopy_loop_8
 	ldo	-16(len),len
 
 .Lbyte_loop:
@@ -324,7 +325,7 @@ ENTRY_CFI(pa_memcpy)
 .Lunaligned_copy:
 	/* align until dst is 32bit-word-aligned */
 	extru	dst,31,2,t1
-	cmpib,COND(=),n	0,t1,.Lcopy_dstaligned
+	cmpib,=,n	0,t1,.Lcopy_dstaligned
 20:	ldb	0(srcspc,src),t1
 	ldo	1(src),src
 21:	stb,ma	t1,1(dstspc,dst)
@@ -362,7 +363,7 @@ ENTRY_CFI(pa_memcpy)
 	cmpiclr,<> 1,t0,%r0
 	b,n .Lcase1
 .Lcase0:
-	cmpb,= %r0,len,.Lcda_finish
+	cmpb,COND(=) %r0,len,.Lcda_finish
 	nop
 
 1:	ldw,ma 4(srcspc,src), a3
@@ -376,7 +377,7 @@ ENTRY_CFI(pa_memcpy)
 1:	ldw,ma 4(srcspc,src), a3
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 	ldo -1(len),len
-	cmpb,=,n %r0,len,.Ldo0
+	cmpb,COND(=),n %r0,len,.Ldo0
 .Ldo4:
 1:	ldw,ma 4(srcspc,src), a0
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
@@ -402,7 +403,7 @@ ENTRY_CFI(pa_memcpy)
 1:	stw,ma t0, 4(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 	ldo -4(len),len
-	cmpb,<> %r0,len,.Ldo4
+	cmpb,COND(<>) %r0,len,.Ldo4
 	nop
 .Ldo0:
 	shrpw a2, a3, %sar, t0
@@ -436,14 +437,14 @@ ENTRY_CFI(pa_memcpy)
 	/* fault exception fixup handlers: */
 #ifdef CONFIG_64BIT
 .Lcopy16_fault:
-10:	b	.Lcopy_done
-	std,ma	t1,8(dstspc,dst)
+	b	.Lcopy_done
+10:	std,ma	t1,8(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 #endif
 
 .Lcopy8_fault:
-10:	b	.Lcopy_done
-	stw,ma	t1,4(dstspc,dst)
+	b	.Lcopy_done
+10:	stw,ma	t1,4(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 
 	.exit
-- 
cgit v1.2.3


From c0eb027e5aef70b71e5a38ee3e264dc0b497f343 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Apr 2017 17:10:08 -0700
Subject: vfs: don't do RCU lookup of empty pathnames

Normal pathname lookup doesn't allow empty pathnames, but using
AT_EMPTY_PATH (with name_to_handle_at() or fstatat(), for example) you
can trigger an empty pathname lookup.

And not only is the RCU lookup in that case entirely unnecessary
(because we'll obviously immediately finalize the end result), it is
actively wrong.

Why? An empth path is a special case that will return the original
'dirfd' dentry - and that dentry may not actually be RCU-free'd,
resulting in a potential use-after-free if we were to initialize the
path lazily under the RCU read lock and depend on complete_walk()
finalizing the dentry.

Found by syzkaller and KASAN.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/namei.c b/fs/namei.c
index d41fab78798b..19dcf62133cc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2145,6 +2145,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 	int retval = 0;
 	const char *s = nd->name->name;
 
+	if (!*s)
+		flags &= ~LOOKUP_RCU;
+
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
 	nd->depth = 0;
-- 
cgit v1.2.3


From 1ec1688c5360e14dde4094d6acbf7516bf6db37e Mon Sep 17 00:00:00 2001
From: Martin Brandenburg <martin@omnibond.com>
Date: Fri, 14 Apr 2017 14:22:41 -0400
Subject: orangefs: free superblock when mount fails

Otherwise lockdep says:

[ 1337.483798] ================================================
[ 1337.483999] [ BUG: lock held when returning to user space! ]
[ 1337.484252] 4.11.0-rc6 #19 Not tainted
[ 1337.484423] ------------------------------------------------
[ 1337.484626] mount/14766 is leaving the kernel with locks still held!
[ 1337.484841] 1 lock held by mount/14766:
[ 1337.485017]  #0:  (&type->s_umount_key#33/1){+.+.+.}, at: [<ffffffff8124171f>] sget_userns+0x2af/0x520

Caught by xfstests generic/413 which tried to mount with the unsupported
mount option dax.  Then xfstests generic/422 ran sync which deadlocks.

Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Acked-by: Mike Marshall <hubcap@omnibond.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/orangefs/devorangefs-req.c |  9 +++++++--
 fs/orangefs/orangefs-kernel.h |  1 +
 fs/orangefs/super.c           | 23 ++++++++++++++++-------
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index c4ab6fdf17a0..e1534c9bab16 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -208,14 +208,19 @@ restart:
 				continue;
 			/*
 			 * Skip ops whose filesystem we don't know about unless
-			 * it is being mounted.
+			 * it is being mounted or unmounted.  It is possible for
+			 * a filesystem we don't know about to be unmounted if
+			 * it fails to mount in the kernel after userspace has
+			 * been sent the mount request.
 			 */
 			/* XXX: is there a better way to detect this? */
 			} else if (ret == -1 &&
 				   !(op->upcall.type ==
 					ORANGEFS_VFS_OP_FS_MOUNT ||
 				     op->upcall.type ==
-					ORANGEFS_VFS_OP_GETATTR)) {
+					ORANGEFS_VFS_OP_GETATTR ||
+				     op->upcall.type ==
+					ORANGEFS_VFS_OP_FS_UMOUNT)) {
 				gossip_debug(GOSSIP_DEV_DEBUG,
 				    "orangefs: skipping op tag %llu %s\n",
 				    llu(op->tag), get_opname_string(op));
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 5e48a0be9761..8afac46fcc87 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -249,6 +249,7 @@ struct orangefs_sb_info_s {
 	char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
 	struct super_block *sb;
 	int mount_pending;
+	int no_list;
 	struct list_head list;
 };
 
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index cd261c8de53a..629d8c917fa6 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -493,7 +493,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	if (ret) {
 		d = ERR_PTR(ret);
-		goto free_op;
+		goto free_sb_and_op;
 	}
 
 	/*
@@ -519,6 +519,9 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 	spin_unlock(&orangefs_superblocks_lock);
 	op_release(new_op);
 
+	/* Must be removed from the list now. */
+	ORANGEFS_SB(sb)->no_list = 0;
+
 	if (orangefs_userspace_version >= 20906) {
 		new_op = op_alloc(ORANGEFS_VFS_OP_FEATURES);
 		if (!new_op)
@@ -533,6 +536,10 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	return dget(sb->s_root);
 
+free_sb_and_op:
+	/* Will call orangefs_kill_sb with sb not in list. */
+	ORANGEFS_SB(sb)->no_list = 1;
+	deactivate_locked_super(sb);
 free_op:
 	gossip_err("orangefs_mount: mount request failed with %d\n", ret);
 	if (ret == -EINVAL) {
@@ -558,12 +565,14 @@ void orangefs_kill_sb(struct super_block *sb)
 	 */
 	 orangefs_unmount_sb(sb);
 
-	/* remove the sb from our list of orangefs specific sb's */
-
-	spin_lock(&orangefs_superblocks_lock);
-	__list_del_entry(&ORANGEFS_SB(sb)->list);	/* not list_del_init */
-	ORANGEFS_SB(sb)->list.prev = NULL;
-	spin_unlock(&orangefs_superblocks_lock);
+	if (!ORANGEFS_SB(sb)->no_list) {
+		/* remove the sb from our list of orangefs specific sb's */
+		spin_lock(&orangefs_superblocks_lock);
+		/* not list_del_init */
+		__list_del_entry(&ORANGEFS_SB(sb)->list);
+		ORANGEFS_SB(sb)->list.prev = NULL;
+		spin_unlock(&orangefs_superblocks_lock);
+	}
 
 	/*
 	 * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us
-- 
cgit v1.2.3


From 3f795cef0ecdf9bc980dd058d49bdab4b19af1d3 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 16 Apr 2017 10:00:14 +0200
Subject: parisc: Fix get_user() for 64-bit value on 32-bit kernel

This fixes a bug in which the upper 32-bits of a 64-bit value which is
read by get_user() was lost on a 32-bit kernel.
While touching this code, split out pre-loading of %sr2 space register
and clean up code indent.

Cc: <stable@vger.kernel.org> # v4.9+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/uaccess.h | 86 +++++++++++++++++++++++++--------------
 1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 8442727f28d2..cbd4f4af8108 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -39,10 +39,10 @@
 #define get_user __get_user
 
 #if !defined(CONFIG_64BIT)
-#define LDD_USER(ptr)		__get_user_asm64(ptr)
+#define LDD_USER(val, ptr)	__get_user_asm64(val, ptr)
 #define STD_USER(x, ptr)	__put_user_asm64(x, ptr)
 #else
-#define LDD_USER(ptr)		__get_user_asm("ldd", ptr)
+#define LDD_USER(val, ptr)	__get_user_asm(val, "ldd", ptr)
 #define STD_USER(x, ptr)	__put_user_asm("std", x, ptr)
 #endif
 
@@ -97,63 +97,87 @@ struct exception_data {
 		" mtsp %0,%%sr2\n\t"		\
 		: : "r"(get_fs()) : )
 
-#define __get_user(x, ptr)                               \
-({                                                       \
-	register long __gu_err __asm__ ("r8") = 0;       \
-	register long __gu_val;				 \
-							 \
-	load_sr2();					 \
-	switch (sizeof(*(ptr))) {			 \
-	    case 1: __get_user_asm("ldb", ptr); break;   \
-	    case 2: __get_user_asm("ldh", ptr); break;   \
-	    case 4: __get_user_asm("ldw", ptr); break;   \
-	    case 8: LDD_USER(ptr);  break;		 \
-	    default: BUILD_BUG(); break;		 \
-	}                                                \
-							 \
-	(x) = (__force __typeof__(*(ptr))) __gu_val;	 \
-	__gu_err;                                        \
+#define __get_user_internal(val, ptr)			\
+({							\
+	register long __gu_err __asm__ ("r8") = 0;	\
+							\
+	switch (sizeof(*(ptr))) {			\
+	case 1: __get_user_asm(val, "ldb", ptr); break;	\
+	case 2: __get_user_asm(val, "ldh", ptr); break; \
+	case 4: __get_user_asm(val, "ldw", ptr); break; \
+	case 8: LDD_USER(val, ptr); break;		\
+	default: BUILD_BUG();				\
+	}						\
+							\
+	__gu_err;					\
 })
 
-#define __get_user_asm(ldx, ptr)                        \
+#define __get_user(val, ptr)				\
+({							\
+	load_sr2();					\
+	__get_user_internal(val, ptr);			\
+})
+
+#define __get_user_asm(val, ldx, ptr)			\
+{							\
+	register long __gu_val;				\
+							\
 	__asm__("1: " ldx " 0(%%sr2,%2),%0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		: "=r"(__gu_val), "=r"(__gu_err)        \
-		: "r"(ptr), "1"(__gu_err));
+		: "r"(ptr), "1"(__gu_err));		\
+							\
+	(val) = (__force __typeof__(*(ptr))) __gu_val;	\
+}
 
 #if !defined(CONFIG_64BIT)
 
-#define __get_user_asm64(ptr) 				\
+#define __get_user_asm64(val, ptr)			\
+{							\
+	union {						\
+		unsigned long long	l;		\
+		__typeof__(*(ptr))	t;		\
+	} __gu_tmp;					\
+							\
 	__asm__("   copy %%r0,%R0\n"			\
 		"1: ldw 0(%%sr2,%2),%0\n"		\
 		"2: ldw 4(%%sr2,%2),%R0\n"		\
 		"9:\n"					\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)	\
 		ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)	\
-		: "=r"(__gu_val), "=r"(__gu_err)	\
-		: "r"(ptr), "1"(__gu_err));
+		: "=&r"(__gu_tmp.l), "=r"(__gu_err)	\
+		: "r"(ptr), "1"(__gu_err));		\
+							\
+	(val) = __gu_tmp.t;				\
+}
 
 #endif /* !defined(CONFIG_64BIT) */
 
 
-#define __put_user(x, ptr)                                      \
+#define __put_user_internal(x, ptr)				\
 ({								\
 	register long __pu_err __asm__ ("r8") = 0;      	\
         __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);	\
 								\
-	load_sr2();						\
 	switch (sizeof(*(ptr))) {				\
-	    case 1: __put_user_asm("stb", __x, ptr); break;     \
-	    case 2: __put_user_asm("sth", __x, ptr); break;     \
-	    case 4: __put_user_asm("stw", __x, ptr); break;     \
-	    case 8: STD_USER(__x, ptr); break;			\
-	    default: BUILD_BUG(); break;			\
-	}                                                       \
+	case 1: __put_user_asm("stb", __x, ptr); break;		\
+	case 2: __put_user_asm("sth", __x, ptr); break;		\
+	case 4: __put_user_asm("stw", __x, ptr); break;		\
+	case 8: STD_USER(__x, ptr); break;			\
+	default: BUILD_BUG();					\
+	}							\
 								\
 	__pu_err;						\
 })
 
+#define __put_user(x, ptr)					\
+({								\
+	load_sr2();						\
+	__put_user_internal(x, ptr);				\
+})
+
+
 /*
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
  * instead of writing. This is because they do not write to any memory
-- 
cgit v1.2.3


From 330c418638612d7658b6314e6a244fcb5f7efac5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 16 Apr 2017 23:17:37 +0900
Subject: Revert "cgroup: avoid attaching a cgroup root to two different
 superblocks"

This reverts commit bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e.

Andrei reports CRIU test hangs with the patch applied.  The bug fixed
by the patch isn't too likely to trigger in actual uses.  Revert the
patch for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andrei Vagin <avagin@virtuozzo.com>
Link: http://lkml.kernel.org/r/20170414232737.GC20350@outlook.office365.com
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 12e19f0636ea..1dc22f6b49f5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
 		 * path is super cold.  Let's just sleep a bit and retry.
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
-		if (IS_ERR_OR_NULL(pinned_sb) ||
+		if (IS_ERR(pinned_sb) ||
 		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))
-- 
cgit v1.2.3


From 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 Apr 2017 13:00:18 -0700
Subject: Linux 4.11-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index efa267a92ba6..5039b9148d15 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 271a8b428f8361f3ad4c599835ccd34dd458b212 Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Wed, 12 Apr 2017 17:06:52 +0100
Subject: sfc: limit the number of receive queues

The number of rx queues is determined by the rss_cpus parameter
or the cpu topology. If that is higher than EFX_MAX_RX_QUEUES the
driver can corrupt state.

Fixes: 8ceee660aacb ("New driver "sfc" for Solarstorm SFC4000 controller.")
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c        | 7 +++++++
 drivers/net/ethernet/sfc/falcon/efx.c | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 50d28261b6b9..b9cb697b2818 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
 		free_cpumask_var(thread_mask);
 	}
 
+	if (count > EFX_MAX_RX_QUEUES) {
+		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+			       "Reducing number of rx queues from %u to %u.\n",
+			       count, EFX_MAX_RX_QUEUES);
+		count = EFX_MAX_RX_QUEUES;
+	}
+
 	/* If RSS is requested for the PF *and* VFs then we can't write RSS
 	 * table entries that are inaccessible to VFs
 	 */
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index f5e5cd1659a1..29614da91cbf 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
 		free_cpumask_var(thread_mask);
 	}
 
+	if (count > EF4_MAX_RX_QUEUES) {
+		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+			       "Reducing number of rx queues from %u to %u.\n",
+			       count, EF4_MAX_RX_QUEUES);
+		count = EF4_MAX_RX_QUEUES;
+	}
+
 	return count;
 }
 
-- 
cgit v1.2.3


From 1215e51edad1272e669172b26aa12aac94810c7f Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 12 Apr 2017 12:32:13 -0700
Subject: ipv4: fix a deadlock in ip_ra_control

Similar to commit 87e9f0315952
("ipv4: fix a potential deadlock in mcast getsockopt() path"),
there is a deadlock scenario for IP_ROUTER_ALERT too:

       CPU0                    CPU1
       ----                    ----
  lock(rtnl_mutex);
                               lock(sk_lock-AF_INET);
                               lock(rtnl_mutex);
  lock(sk_lock-AF_INET);

Fix this by always locking RTNL first on all setsockopt() paths.

Note, after this patch ip_ra_lock is no longer needed either.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c |  1 +
 net/ipv4/ipmr.c        | 11 ++---------
 net/ipv4/raw.c         |  2 ++
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ebd953bc5607..bda318ae9876 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -591,6 +591,7 @@ static bool setsockopt_needs_rtnl(int optname)
 	case MCAST_LEAVE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_UNBLOCK_SOURCE:
+	case IP_ROUTER_ALERT:
 		return true;
 	}
 	return false;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c0317c940bcd..b036e85e093b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk)
 	struct net *net = sock_net(sk);
 	struct mr_table *mrt;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 	ipmr_for_each_table(mrt, net) {
 		if (sk == rtnl_dereference(mrt->mroute_sk)) {
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk)
 			mroute_clean_tables(mrt, false);
 		}
 	}
-	rtnl_unlock();
 }
 
 /* Socket options and virtual interface manipulation. The whole
@@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
 			ret = -EACCES;
 		} else {
-			/* We need to unlock here because mrtsock_destruct takes
-			 * care of rtnl itself and we can't change that due to
-			 * the IP_ROUTER_ALERT setsockopt which runs without it.
-			 */
-			rtnl_unlock();
 			ret = ip_ra_control(sk, 0, NULL);
-			goto out;
+			goto out_unlock;
 		}
 		break;
 	case MRT_ADD_VIF:
@@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	}
 out_unlock:
 	rtnl_unlock();
-out:
 	return ret;
 }
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8119e1f66e03..9d943974de2b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout)
 	/*
 	 * Raw sockets may have direct kernel references. Kill them.
 	 */
+	rtnl_lock();
 	ip_ra_control(sk, 0, NULL);
+	rtnl_unlock();
 
 	sk_common_release(sk);
 }
-- 
cgit v1.2.3


From 1862d6208db0aeca9c8ace44915b08d5ab2cd667 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 12 Apr 2017 19:24:35 -0400
Subject: net-timestamp: avoid use-after-free in ip_recv_error

Syzkaller reported a use-after-free in ip_recv_error at line

    info->ipi_ifindex = skb->dev->ifindex;

This function is called on dequeue from the error queue, at which
point the device pointer may no longer be valid.

Save ifindex on enqueue in __skb_complete_tx_timestamp, when the
pointer is valid or NULL. Store it in temporary storage skb->cb.

It is safe to reference skb->dev here, as called from device drivers
or dev_queue_xmit. The exception is when called from tcp_ack_tstamp;
in that case it is NULL and ifindex is set to 0 (invalid).

Do not return a pktinfo cmsg if ifindex is 0. This maintains the
current behavior of not returning a cmsg if skb->dev was NULL.

On dequeue, the ipv4 path will cast from sock_exterr_skb to
in_pktinfo. Both have ifindex as their first element, so no explicit
conversion is needed. This is by design, introduced in commit
0b922b7a829c ("net: original ingress device index in PKTINFO"). For
ipv6 ip6_datagram_support_cmsg converts to in6_pktinfo.

Fixes: 829ae9d61165 ("net-timestamp: allow reading recv cmsg on errqueue with origin tstamp")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c      |  1 +
 net/ipv4/ip_sockglue.c |  9 ++++-----
 net/ipv6/datagram.c    | 10 +---------
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f781092fda9..35c1e2460206 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3807,6 +3807,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	serr->ee.ee_info = tstype;
 	serr->opt_stats = opt_stats;
+	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 		if (sk->sk_protocol == IPPROTO_TCP &&
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index bda318ae9876..1d46d05efb0f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 		return false;
 
 	/* Support IP_PKTINFO on tstamp packets if requested, to correlate
-	 * timestamp with egress dev. Not possible for packets without dev
+	 * timestamp with egress dev. Not possible for packets without iif
 	 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 	 */
-	if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
-	    (!skb->dev))
+	info = PKTINFO_SKB_CB(skb);
+	if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+	    !info->ipi_ifindex)
 		return false;
 
-	info = PKTINFO_SKB_CB(skb);
 	info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
-	info->ipi_ifindex = skb->dev->ifindex;
 	return true;
 }
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index eec27f87efac..e011122ebd43 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
  * At one point, excluding local errors was a quick test to identify icmp/icmp6
  * errors. This is no longer true, but the test remained, so the v6 stack,
  * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
- *
- * Timestamp code paths do not initialize the fields expected by cmsg:
- * the PKTINFO fields in skb->cb[]. Fill those in here.
  */
 static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
 				      struct sock_exterr_skb *serr)
@@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
 	if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
 		return false;
 
-	if (!skb->dev)
+	if (!IP6CB(skb)->iif)
 		return false;
 
-	if (skb->protocol == htons(ETH_P_IPV6))
-		IP6CB(skb)->iif = skb->dev->ifindex;
-	else
-		PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
-
 	return true;
 }
 
-- 
cgit v1.2.3


From b47a57a273fc0c1a922fe8b6a139881971e3d8de Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@cavium.com>
Date: Thu, 13 Apr 2017 07:25:01 +0000
Subject: net: thunderx: Fix set_max_bgx_per_node for 81xx rgx

Add the PCI_SUBSYS_DEVID_81XX_RGX and use the same to set
the max bgx per node count.

This fixes the issue intoduced by following commit
78aacb6f6 net: thunderx: Fix invalid mac addresses for node1 interfaces
With this commit the max_bgx_per_node for 81xx is set as 2 instead of 3
because of which num_vfs is always calculated as zero.

Signed-off-by: George Cherian <george.cherian@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 1 +
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 64a1095e4d14..a0ca68ce3fbb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev)
 	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
 	switch (sdevid) {
 	case PCI_SUBSYS_DEVID_81XX_BGX:
+	case PCI_SUBSYS_DEVID_81XX_RGX:
 		max_bgx_per_node = MAX_BGX_PER_CN81XX;
 		break;
 	case PCI_SUBSYS_DEVID_83XX_BGX:
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index c5080f2cead5..6b7fe6fdd13b 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -16,6 +16,7 @@
 /* Subsystem device IDs */
 #define PCI_SUBSYS_DEVID_88XX_BGX		0xA126
 #define PCI_SUBSYS_DEVID_81XX_BGX		0xA226
+#define PCI_SUBSYS_DEVID_81XX_RGX		0xA254
 #define PCI_SUBSYS_DEVID_83XX_BGX		0xA326
 
 #define    MAX_BGX_THUNDER			8 /* Max 2 nodes, 4 per node */
-- 
cgit v1.2.3


From 426c87caa2b4578b43cd3f689f02c65b743b2559 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Thu, 13 Apr 2017 10:57:15 -0600
Subject: net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule

Only need 1 l3mdev FIB rule. Fix setting NLM_F_EXCL in the nlmsghdr.

Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index d6988db1930d..7d909c8183e9 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
 		goto nla_put_failure;
 
 	/* rule only needs to appear once */
-	nlh->nlmsg_flags &= NLM_F_EXCL;
+	nlh->nlmsg_flags |= NLM_F_EXCL;
 
 	frh = nlmsg_data(nlh);
 	memset(frh, 0, sizeof(*frh));
-- 
cgit v1.2.3


From bfe72442578bb112626e476ffe1f276504d85b95 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 13 Apr 2017 14:11:27 -0500
Subject: net: phy: micrel: fix crash when statistic requested for KSZ9031 phy

Now the command:
	ethtool --phy-statistics eth0
will cause system crash with meassage "Unable to handle kernel NULL pointer
dereference at virtual address 00000010" from:

 (kszphy_get_stats) from [<c069f1d8>] (ethtool_get_phy_stats+0xd8/0x210)
 (ethtool_get_phy_stats) from [<c06a0738>] (dev_ethtool+0x5b8/0x228c)
 (dev_ethtool) from [<c06b5484>] (dev_ioctl+0x3fc/0x964)
 (dev_ioctl) from [<c0679f7c>] (sock_ioctl+0x170/0x2c0)
 (sock_ioctl) from [<c02419d4>] (do_vfs_ioctl+0xa8/0x95c)
 (do_vfs_ioctl) from [<c02422c4>] (SyS_ioctl+0x3c/0x64)
 (SyS_ioctl) from [<c0107d60>] (ret_fast_syscall+0x0/0x44)

The reason: phy_driver structure for KSZ9031 phy has no .probe() callback
defined. As result, struct phy_device *phydev->priv pointer will not be
initializes (null).
This issue will affect also following phys:
 KSZ8795, KSZ886X, KSZ8873MLL, KSZ9031, KSZ9021, KSZ8061, KS8737

Fix it by:
- adding .probe() = kszphy_probe() callback to KSZ9031, KSZ9021
phys. The kszphy_probe() can be re-used as it doesn't do any phy specific
settings.
- removing statistic callbacks from other phys (KSZ8795, KSZ886X,
KSZ8873MLL, KSZ8061, KS8737) as they doesn't have corresponding
statistic counters.

Fixes: 2b2427d06426 ("phy: micrel: Add ethtool statistics counters")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070ca676..1326d99771c1 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = {
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
 	.config_init	= ksz9031_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= ksz9031_read_status,
@@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 }, {
@@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = {
 	.config_init	= kszphy_config_init,
 	.config_aneg	= ksz8873mll_config_aneg,
 	.read_status	= ksz8873mll_read_status,
-	.get_sset_count = kszphy_get_sset_count,
-	.get_strings	= kszphy_get_strings,
-	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 } };
-- 
cgit v1.2.3


From 81d2dd09ca11a2b834c8a915c6aabf8325d57ecf Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 14 Apr 2017 11:19:11 +0800
Subject: net: ethernet: mediatek: fix inconsistency between TXD and the used
 buffer

Fix inconsistency between the TXD descriptor and the used buffer that
would cause unexpected logic at mtk_tx_unmap() during skb housekeeping.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 9e757684816d..50a6fe87e990 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_dma *itxd, *txd;
-	struct mtk_tx_buf *tx_buf;
+	struct mtk_tx_buf *itx_buf, *tx_buf;
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
@@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
 	txd4 |= fport;
 
-	tx_buf = mtk_desc_to_tx_buf(ring, itxd);
-	memset(tx_buf, 0, sizeof(*tx_buf));
+	itx_buf = mtk_desc_to_tx_buf(ring, itxd);
+	memset(itx_buf, 0, sizeof(*itx_buf));
 
 	if (gso)
 		txd4 |= TX_DMA_TSO;
@@ -647,9 +647,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 		return -ENOMEM;
 
 	WRITE_ONCE(itxd->txd1, mapped_addr);
-	tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
-	dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
-	dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+	itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+	dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
+	dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
 
 	/* TX SG offload */
 	txd = itxd;
@@ -685,10 +685,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 					       last_frag * TX_DMA_LS0));
 			WRITE_ONCE(txd->txd4, fport);
 
-			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf = mtk_desc_to_tx_buf(ring, txd);
 			memset(tx_buf, 0, sizeof(*tx_buf));
-
+			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
 			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
 			dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
@@ -698,7 +697,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	/* store skb to cleanup */
-	tx_buf->skb = skb;
+	itx_buf->skb = skb;
 
 	WRITE_ONCE(itxd->txd4, txd4);
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
-- 
cgit v1.2.3


From 134d21525f5f7c89f1f6ce052a11ac09dc27b331 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 14 Apr 2017 11:19:12 +0800
Subject: net: ethernet: mediatek: fix inconsistency of port number carried in
 TXD

Fix port inconsistency on TXD due to hardware BUG that would cause
different port number is carried on the same TXD between tx_map()
and tx_unmap() with the iperf test. It would cause confusing BQL
logic which leads to kernel panic when dual GMAC runs concurrently.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 14 +++++++++-----
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 12 +++++++++---
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 50a6fe87e990..93949139e62c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -648,6 +648,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 	WRITE_ONCE(itxd->txd1, mapped_addr);
 	itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+	itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+			  MTK_TX_FLAGS_FPORT1;
 	dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
 	dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
 
@@ -689,6 +691,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 			memset(tx_buf, 0, sizeof(*tx_buf));
 			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+			tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+					 MTK_TX_FLAGS_FPORT1;
+
 			dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
 			dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
 			frag_size -= frag_map_size;
@@ -1011,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
 
 	while ((cpu != dma) && budget) {
 		u32 next_cpu = desc->txd2;
-		int mac;
+		int mac = 0;
 
 		desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
 		if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
 			break;
 
-		mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
-		       TX_DMA_FPORT_MASK;
-		mac--;
-
 		tx_buf = mtk_desc_to_tx_buf(ring, desc);
+		if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
+			mac = 1;
+
 		skb = tx_buf->skb;
 		if (!skb) {
 			condition = 1;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 99b1c8e9f16f..08285a96ff70 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -406,12 +406,18 @@ struct mtk_hw_stats {
 	struct u64_stats_sync	syncp;
 };
 
-/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
- * memory was allocated so that it can be freed properly
- */
 enum mtk_tx_flags {
+	/* PDMA descriptor can point at 1-2 segments. This enum allows us to
+	 * track how memory was allocated so that it can be freed properly.
+	 */
 	MTK_TX_FLAGS_SINGLE0	= 0x01,
 	MTK_TX_FLAGS_PAGE0	= 0x02,
+
+	/* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
+	 * SKB out instead of looking up through hardware TX descriptor.
+	 */
+	MTK_TX_FLAGS_FPORT0	= 0x04,
+	MTK_TX_FLAGS_FPORT1	= 0x08,
 };
 
 /* This enum allows us to identify how the clock is defined on the array of the
-- 
cgit v1.2.3


From 0aa8c13eb512823bc4d60397d666a6b6260bb965 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 14 Apr 2017 20:22:43 +0200
Subject: ipv6: drop non loopback packets claiming to originate from ::1

We lack a saddr check for ::1. This causes security issues e.g. with acls
permitting connections from ::1 because of assumption that these originate
from local machine.

Assuming a source address of ::1 is local seems reasonable.
RFC4291 doesn't allow such a source address either, so drop such packets.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_input.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4bce153..c45b12b4431c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 			max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
 	/*
 	 * RFC4291 2.5.3
+	 * The loopback address must not be used as the source address in IPv6
+	 * packets that are sent outside of a single node. [..]
 	 * A packet received on an interface with a destination address
 	 * of loopback must be dropped.
 	 */
-	if (!(dev->flags & IFF_LOOPBACK) &&
-	    ipv6_addr_loopback(&hdr->daddr))
+	if ((ipv6_addr_loopback(&hdr->saddr) ||
+	     ipv6_addr_loopback(&hdr->daddr)) &&
+	     !(dev->flags & IFF_LOOPBACK))
 		goto err;
 
 	/* RFC4291 Errata ID: 3480
-- 
cgit v1.2.3


From 6b1bb01bcc5be91452bd3b5bda4300f179fd4053 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 17 Apr 2017 03:12:06 +0200
Subject: bpf: fix cb access in socket filter programs on tail calls

Commit ff936a04e5f2 ("bpf: fix cb access in socket filter programs")
added a fix for socket filter programs such that in i) AF_PACKET the
20 bytes of skb->cb[] area gets zeroed before use in order to not leak
data, and ii) socket filter programs attached to TCP/UDP sockets need
to save/restore these 20 bytes since they are also used by protocol
layers at that time.

The problem is that bpf_prog_run_save_cb() and bpf_prog_run_clear_cb()
only look at the actual attached program to determine whether to zero
or save/restore the skb->cb[] parts. There can be cases where the
actual attached program does not access the skb->cb[], but the program
tail calls into another program which does access this area. In such
a case, the zero or save/restore is currently not performed.

Since the programs we tail call into are unknown at verification time
and can dynamically change, we need to assume that whenever the attached
program performs a tail call, that later programs could access the
skb->cb[], and therefore we need to always set cb_access to 1.

Fixes: ff936a04e5f2 ("bpf: fix cb access in socket filter programs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7af0dcc5d755..ee5c969d5963 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -617,6 +617,13 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 			if (insn->imm == BPF_FUNC_xdp_adjust_head)
 				prog->xdp_adjust_head = 1;
 			if (insn->imm == BPF_FUNC_tail_call) {
+				/* If we tail call into other programs, we
+				 * cannot make any assumptions since they
+				 * can be replaced dynamically during runtime
+				 * in the program array.
+				 */
+				prog->cb_access = 1;
+
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
 				 * interpeter for every normal call
-- 
cgit v1.2.3


From c2002f983767ea0a53acbb3e21f771e7a7e2ed28 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 17 Apr 2017 03:12:07 +0200
Subject: bpf: fix checking xdp_adjust_head on tail calls

Commit 17bedab27231 ("bpf: xdp: Allow head adjustment in XDP prog")
added the xdp_adjust_head bit to the BPF prog in order to tell drivers
that the program that is to be attached requires support for the XDP
bpf_xdp_adjust_head() helper such that drivers not supporting this
helper can reject the program. There are also drivers that do support
the helper, but need to check for xdp_adjust_head bit in order to move
packet metadata prepended by the firmware away for making headroom.

For these cases, the current check for xdp_adjust_head bit is insufficient
since there can be cases where the program itself does not use the
bpf_xdp_adjust_head() helper, but tail calls into another program that
uses bpf_xdp_adjust_head(). As such, the xdp_adjust_head bit is still
set to 0. Since the first program has no control over which program it
calls into, we need to assume that bpf_xdp_adjust_head() helper is used
upon tail calls. Thus, for the very same reasons in cb_access, set the
xdp_adjust_head bit to 1 when the main program uses tail calls.

Fixes: 17bedab27231 ("bpf: xdp: Allow head adjustment in XDP prog")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/syscall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ee5c969d5963..821f9e807de5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -623,6 +623,7 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 				 * in the program array.
 				 */
 				prog->cb_access = 1;
+				prog->xdp_adjust_head = 1;
 
 				/* mark bpf_tail_call as different opcode
 				 * to avoid conditional branch in
-- 
cgit v1.2.3


From d879d0b8c183aabeb9a65eba91f3f9e3c7e7b905 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Apr 2017 11:44:27 +0900
Subject: ftrace: Fix function pid filter on instances

When function tracer has a pid filter, it adds a probe to sched_switch
to track if current task can be ignored.  The probe checks the
ftrace_ignore_pid from current tr to filter tasks.  But it misses to
delete the probe when removing an instance so that it can cause a crash
due to the invalid tr pointer (use-after-free).

This is easily reproducible with the following:

  # cd /sys/kernel/debug/tracing
  # mkdir instances/buggy
  # echo $$ > instances/buggy/set_ftrace_pid
  # rmdir instances/buggy

  ============================================================================
  BUG: KASAN: use-after-free in ftrace_filter_pid_sched_switch_probe+0x3d/0x90
  Read of size 8 by task kworker/0:1/17
  CPU: 0 PID: 17 Comm: kworker/0:1 Tainted: G    B           4.11.0-rc3  #198
  Call Trace:
   dump_stack+0x68/0x9f
   kasan_object_err+0x21/0x70
   kasan_report.part.1+0x22b/0x500
   ? ftrace_filter_pid_sched_switch_probe+0x3d/0x90
   kasan_report+0x25/0x30
   __asan_load8+0x5e/0x70
   ftrace_filter_pid_sched_switch_probe+0x3d/0x90
   ? fpid_start+0x130/0x130
   __schedule+0x571/0xce0
   ...

To fix it, use ftrace_clear_pids() to unregister the probe.  As
instance_rmdir() already updated ftrace codes, it can just free the
filter safely.

Link: http://lkml.kernel.org/r/20170417024430.21194-2-namhyung@kernel.org

Fixes: 0c8916c34203 ("tracing: Add rmdir to remove multibuffer instances")
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 9 +++++++++
 kernel/trace/trace.c  | 1 +
 kernel/trace/trace.h  | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 27bb2e61276e..dd3e91d68dc7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5566,6 +5566,15 @@ static void clear_ftrace_pids(struct trace_array *tr)
 	trace_free_pid_list(pid_list);
 }
 
+void ftrace_clear_pids(struct trace_array *tr)
+{
+	mutex_lock(&ftrace_lock);
+
+	clear_ftrace_pids(tr);
+
+	mutex_unlock(&ftrace_lock);
+}
+
 static void ftrace_pid_reset(struct trace_array *tr)
 {
 	mutex_lock(&ftrace_lock);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f35109514a01..d484452ae648 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7402,6 +7402,7 @@ static int instance_rmdir(const char *name)
 
 	tracing_set_nop(tr);
 	event_trace_del_tracer(tr);
+	ftrace_clear_pids(tr);
 	ftrace_destroy_function_files(tr);
 	tracefs_remove_recursive(tr->dir);
 	free_trace_buffers(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ae1cce91fead..d19d52d600d6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -896,6 +896,7 @@ int using_ftrace_ops_list_func(void);
 void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 void ftrace_init_tracefs_toplevel(struct trace_array *tr,
 				  struct dentry *d_tracer);
+void ftrace_clear_pids(struct trace_array *tr);
 #else
 static inline int ftrace_trace_task(struct trace_array *tr)
 {
@@ -914,6 +915,7 @@ ftrace_init_global_array_ops(struct trace_array *tr) { }
 static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
 static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
 static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_clear_pids(struct trace_array *tr) { }
 /* ftace_func_t type is not defined, use macro instead of static inline */
 #define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
-- 
cgit v1.2.3


From 093be89a12c8724883ac803420cba8b08a947d3b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Apr 2017 11:44:30 +0900
Subject: selftests: ftrace: Add a testcase for function PID filter

Like event pid filtering test, add function pid filtering test with the
new "function-fork" option.  It also tests it on an instance directory
so that it can verify the bug related pid filtering on instances.

Link: http://lkml.kernel.org/r/20170417024430.21194-5-namhyung@kernel.org

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../ftrace/test.d/ftrace/func-filter-pid.tc        | 98 ++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc

diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
new file mode 100644
index 000000000000..cd552f44c3b4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -0,0 +1,98 @@
+#!/bin/sh
+# description: ftrace - function pid filters
+
+# Make sure that function pid matching filter works.
+# Also test it on an instance directory
+
+if ! grep -q function available_tracers; then
+    echo "no function tracer configured"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_pid ]; then
+    echo "set_ftrace_pid not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+read PID _ < /proc/self/stat
+
+# default value of function-fork option
+orig_value=`grep function-fork trace_options`
+
+do_reset() {
+    reset_tracer
+    clear_trace
+    enable_tracing
+    echo > set_ftrace_filter
+    echo > set_ftrace_pid
+
+    echo $orig_value > trace_options
+}
+
+fail() { # msg
+    do_reset
+    echo $1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+do_test() {
+    disable_tracing
+
+    echo do_execve* > set_ftrace_filter
+    echo *do_fork >> set_ftrace_filter
+
+    echo $PID > set_ftrace_pid
+    echo function > current_tracer
+
+    # don't allow children to be traced
+    echo nofunction-fork > trace_options
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should be 0
+    if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then
+	fail "PID filtering not working?"
+    fi
+
+    disable_tracing
+    clear_trace
+
+    # allow children to be traced
+    echo function-fork > trace_options
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should NOT be 0
+    if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then
+	fail "PID filtering not following fork?"
+    fi
+}
+
+do_test
+
+mkdir instances/foo
+cd instances/foo
+do_test
+cd ../../
+rmdir instances/foo
+
+do_reset
+
+exit 0
-- 
cgit v1.2.3


From 9ed19c7695670d00455c1de4682d5c7f14618689 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 18 Apr 2017 12:32:21 -0400
Subject: selftests: ftrace: Add check for function-fork before running pid
 filter test

Have the func-filter-pid test check for the function-fork option before
testing it. It can still test the pid filtering, but will stop before
testing the function-fork option for children inheriting the pids.
This allows the test to be added before the function-fork feature, but after
a bug fix that triggers one of the bugs the test can cause.

Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 .../ftrace/test.d/ftrace/func-filter-pid.tc        | 27 ++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index cd552f44c3b4..bab5ff7c607e 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -19,10 +19,19 @@ if [ ! -f set_ftrace_filter ]; then
     exit_unsupported
 fi
 
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+    do_function_fork=0
+    echo "no option for function-fork found. Option will not be tested."
+fi
+
 read PID _ < /proc/self/stat
 
-# default value of function-fork option
-orig_value=`grep function-fork trace_options`
+if [ $do_function_fork -eq 1 ]; then
+    # default value of function-fork option
+    orig_value=`grep function-fork trace_options`
+fi
 
 do_reset() {
     reset_tracer
@@ -31,6 +40,10 @@ do_reset() {
     echo > set_ftrace_filter
     echo > set_ftrace_pid
 
+    if [ $do_function_fork -eq 0 ]; then
+	return
+    fi
+
     echo $orig_value > trace_options
 }
 
@@ -53,8 +66,10 @@ do_test() {
     echo $PID > set_ftrace_pid
     echo function > current_tracer
 
-    # don't allow children to be traced
-    echo nofunction-fork > trace_options
+    if [ $do_function_fork -eq 1 ]; then
+	# don't allow children to be traced
+	echo nofunction-fork > trace_options
+    fi
 
     enable_tracing
     yield
@@ -70,6 +85,10 @@ do_test() {
     disable_tracing
     clear_trace
 
+    if [ $do_function_fork -eq 0 ]; then
+	return
+    fi
+
     # allow children to be traced
     echo function-fork > trace_options
 
-- 
cgit v1.2.3


From 395102db441abb8fd18fec5dd81428b5120232af Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Mon, 10 Apr 2017 11:50:52 -0400
Subject: sparc64: Use LOCKDEP_SMALL, not PROVE_LOCKING_SMALL

CONFIG_PROVE_LOCKING_SMALL shrinks the memory usage of lockdep so the
kernel text, data, and bss fit in the required 32MB limit, but this
option is not set for every config that enables lockdep.

A 4.10 kernel fails to boot with the console output

    Kernel: Using 8 locked TLB entries for main kernel image.
    hypervisor_tlb_lock[2000000:0:8000000071c007c3:1]: errors with f
    Program terminated

with these config options

    CONFIG_LOCKDEP=y
    CONFIG_LOCK_STAT=y
    CONFIG_PROVE_LOCKING=n

To fix, rename CONFIG_PROVE_LOCKING_SMALL to CONFIG_LOCKDEP_SMALL, and
enable this option with CONFIG_LOCKDEP=y so we get the reduced memory
usage every time lockdep is turned on.

Tested that CONFIG_LOCKDEP_SMALL is set to 'y' if and only if
CONFIG_LOCKDEP is set to 'y'.  When other lockdep-related config options
that select CONFIG_LOCKDEP are enabled (e.g. CONFIG_LOCK_STAT or
CONFIG_PROVE_LOCKING), verified that CONFIG_LOCKDEP_SMALL is also
enabled.

Fixes: e6b5f1be7afe ("config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc")
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Babu Moger <babu.moger@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/Kconfig                 | 2 +-
 kernel/locking/lockdep_internals.h | 6 +++---
 lib/Kconfig.debug                  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 68ac5c7cd982..a59deaef21e5 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -43,7 +43,7 @@ config SPARC
 	select ARCH_HAS_SG_CHAIN
 	select CPU_NO_EFFICIENT_FFS
 	select HAVE_ARCH_HARDENED_USERCOPY
-	select PROVE_LOCKING_SMALL if PROVE_LOCKING
+	select LOCKDEP_SMALL if LOCKDEP
 	select ARCH_WANT_RELAX_ORDER
 
 config SPARC32
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c2b88490d857..c08fbd2f5ba9 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,13 +46,13 @@ enum {
 		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
 
 /*
- * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
  * .data and .bss to fit in required 32MB limit for the kernel. With
- * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems.
  * So, reduce the static allocations for lockdeps related structures so that
  * everything fits in current required size limit.
  */
-#ifdef CONFIG_PROVE_LOCKING_SMALL
+#ifdef CONFIG_LOCKDEP_SMALL
 /*
  * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
  * we track.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 97d62c2da6c2..fa16c0f82d6e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1103,9 +1103,6 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/locking/lockdep-design.txt.
 
-config PROVE_LOCKING_SMALL
-	bool
-
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -1114,6 +1111,9 @@ config LOCKDEP
 	select KALLSYMS
 	select KALLSYMS_ALL
 
+config LOCKDEP_SMALL
+	bool
+
 config LOCK_STAT
 	bool "Lock usage statistics"
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-- 
cgit v1.2.3


From 544f8f935863c5a9ca3e34306ea3316095e7b7bf Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Mon, 17 Apr 2017 15:46:41 -0700
Subject: sparc64: Fix hugepage page table free

Make sure the start adderess is aligned to PMD_SIZE
boundary when freeing page table backing a hugepage
region. The issue was causing segfaults when a region
backed by 64K pages was unmapped since such a region
is in general not PMD_SIZE aligned.

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/hugetlbpage.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index ee5273ad918d..7c29d38e6b99 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -461,6 +461,22 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 	pgd_t *pgd;
 	unsigned long next;
 
+	addr &= PMD_MASK;
+	if (addr < floor) {
+		addr += PMD_SIZE;
+		if (!addr)
+			return;
+	}
+	if (ceiling) {
+		ceiling &= PMD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		end -= PMD_SIZE;
+	if (addr > end - 1)
+		return;
+
 	pgd = pgd_offset(tlb->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-- 
cgit v1.2.3


From 1debdc8f9ebd07daf140e417b3841596911e0066 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Mon, 17 Apr 2017 15:55:22 +0300
Subject: sh_eth: unmap DMA buffers when freeing rings

The DMA API debugging (when enabled) causes:

WARNING: CPU: 0 PID: 1445 at lib/dma-debug.c:519 add_dma_entry+0xe0/0x12c
DMA-API: exceeded 7 overlapping mappings of cacheline 0x01b2974d

to be  printed after repeated initialization of the Ether device, e.g.
suspend/resume or 'ifconfig' up/down. This is because DMA buffers mapped
using dma_map_single() in sh_eth_ring_format() and sh_eth_start_xmit() are
never unmapped. Resolve this problem by unmapping the buffers when freeing
the descriptor  rings;  in order  to do it right, we'd have to add an extra
parameter to sh_eth_txfree() (we rename this function to sh_eth_tx_free(),
while at it).

Based on the commit a47b70ea86bd ("ravb: unmap descriptors when freeing
rings").

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 122 +++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 54248775f227..f68c4db656ed 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1127,12 +1127,70 @@ static struct mdiobb_ops bb_ops = {
 	.get_mdio_data = sh_get_mdio,
 };
 
+/* free Tx skb function */
+static int sh_eth_tx_free(struct net_device *ndev, bool sent_only)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+	struct sh_eth_txdesc *txdesc;
+	int free_num = 0;
+	int entry;
+	bool sent;
+
+	for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
+		entry = mdp->dirty_tx % mdp->num_tx_ring;
+		txdesc = &mdp->tx_ring[entry];
+		sent = !(txdesc->status & cpu_to_le32(TD_TACT));
+		if (sent_only && !sent)
+			break;
+		/* TACT bit must be checked before all the following reads */
+		dma_rmb();
+		netif_info(mdp, tx_done, ndev,
+			   "tx entry %d status 0x%08x\n",
+			   entry, le32_to_cpu(txdesc->status));
+		/* Free the original skb. */
+		if (mdp->tx_skbuff[entry]) {
+			dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
+					 le32_to_cpu(txdesc->len) >> 16,
+					 DMA_TO_DEVICE);
+			dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
+			mdp->tx_skbuff[entry] = NULL;
+			free_num++;
+		}
+		txdesc->status = cpu_to_le32(TD_TFP);
+		if (entry >= mdp->num_tx_ring - 1)
+			txdesc->status |= cpu_to_le32(TD_TDLE);
+
+		if (sent) {
+			ndev->stats.tx_packets++;
+			ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
+		}
+	}
+	return free_num;
+}
+
 /* free skb and descriptor buffer */
 static void sh_eth_ring_free(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ringsize, i;
 
+	if (mdp->rx_ring) {
+		for (i = 0; i < mdp->num_rx_ring; i++) {
+			if (mdp->rx_skbuff[i]) {
+				struct sh_eth_rxdesc *rxdesc = &mdp->rx_ring[i];
+
+				dma_unmap_single(&ndev->dev,
+						 le32_to_cpu(rxdesc->addr),
+						 ALIGN(mdp->rx_buf_sz, 32),
+						 DMA_FROM_DEVICE);
+			}
+		}
+		ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
+		dma_free_coherent(NULL, ringsize, mdp->rx_ring,
+				  mdp->rx_desc_dma);
+		mdp->rx_ring = NULL;
+	}
+
 	/* Free Rx skb ringbuffer */
 	if (mdp->rx_skbuff) {
 		for (i = 0; i < mdp->num_rx_ring; i++)
@@ -1141,27 +1199,18 @@ static void sh_eth_ring_free(struct net_device *ndev)
 	kfree(mdp->rx_skbuff);
 	mdp->rx_skbuff = NULL;
 
-	/* Free Tx skb ringbuffer */
-	if (mdp->tx_skbuff) {
-		for (i = 0; i < mdp->num_tx_ring; i++)
-			dev_kfree_skb(mdp->tx_skbuff[i]);
-	}
-	kfree(mdp->tx_skbuff);
-	mdp->tx_skbuff = NULL;
-
-	if (mdp->rx_ring) {
-		ringsize = sizeof(struct sh_eth_rxdesc) * mdp->num_rx_ring;
-		dma_free_coherent(NULL, ringsize, mdp->rx_ring,
-				  mdp->rx_desc_dma);
-		mdp->rx_ring = NULL;
-	}
-
 	if (mdp->tx_ring) {
+		sh_eth_tx_free(ndev, false);
+
 		ringsize = sizeof(struct sh_eth_txdesc) * mdp->num_tx_ring;
 		dma_free_coherent(NULL, ringsize, mdp->tx_ring,
 				  mdp->tx_desc_dma);
 		mdp->tx_ring = NULL;
 	}
+
+	/* Free Tx skb ringbuffer */
+	kfree(mdp->tx_skbuff);
+	mdp->tx_skbuff = NULL;
 }
 
 /* format skb and descriptor buffer */
@@ -1409,43 +1458,6 @@ static void sh_eth_dev_exit(struct net_device *ndev)
 	update_mac_address(ndev);
 }
 
-/* free Tx skb function */
-static int sh_eth_txfree(struct net_device *ndev)
-{
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct sh_eth_txdesc *txdesc;
-	int free_num = 0;
-	int entry;
-
-	for (; mdp->cur_tx - mdp->dirty_tx > 0; mdp->dirty_tx++) {
-		entry = mdp->dirty_tx % mdp->num_tx_ring;
-		txdesc = &mdp->tx_ring[entry];
-		if (txdesc->status & cpu_to_le32(TD_TACT))
-			break;
-		/* TACT bit must be checked before all the following reads */
-		dma_rmb();
-		netif_info(mdp, tx_done, ndev,
-			   "tx entry %d status 0x%08x\n",
-			   entry, le32_to_cpu(txdesc->status));
-		/* Free the original skb. */
-		if (mdp->tx_skbuff[entry]) {
-			dma_unmap_single(&ndev->dev, le32_to_cpu(txdesc->addr),
-					 le32_to_cpu(txdesc->len) >> 16,
-					 DMA_TO_DEVICE);
-			dev_kfree_skb_irq(mdp->tx_skbuff[entry]);
-			mdp->tx_skbuff[entry] = NULL;
-			free_num++;
-		}
-		txdesc->status = cpu_to_le32(TD_TFP);
-		if (entry >= mdp->num_tx_ring - 1)
-			txdesc->status |= cpu_to_le32(TD_TDLE);
-
-		ndev->stats.tx_packets++;
-		ndev->stats.tx_bytes += le32_to_cpu(txdesc->len) >> 16;
-	}
-	return free_num;
-}
-
 /* Packet receive function */
 static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota)
 {
@@ -1690,7 +1702,7 @@ static void sh_eth_error(struct net_device *ndev, u32 intr_status)
 			   intr_status, mdp->cur_tx, mdp->dirty_tx,
 			   (u32)ndev->state, edtrr);
 		/* dirty buffer free */
-		sh_eth_txfree(ndev);
+		sh_eth_tx_free(ndev, true);
 
 		/* SH7712 BUG */
 		if (edtrr ^ sh_eth_get_edtrr_trns(mdp)) {
@@ -1751,7 +1763,7 @@ static irqreturn_t sh_eth_interrupt(int irq, void *netdev)
 		/* Clear Tx interrupts */
 		sh_eth_write(ndev, intr_status & cd->tx_check, EESR);
 
-		sh_eth_txfree(ndev);
+		sh_eth_tx_free(ndev, true);
 		netif_wake_queue(ndev);
 	}
 
@@ -2412,7 +2424,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	spin_lock_irqsave(&mdp->lock, flags);
 	if ((mdp->cur_tx - mdp->dirty_tx) >= (mdp->num_tx_ring - 4)) {
-		if (!sh_eth_txfree(ndev)) {
+		if (!sh_eth_tx_free(ndev, true)) {
 			netif_warn(mdp, tx_queued, ndev, "TxFD exhausted.\n");
 			netif_stop_queue(ndev);
 			spin_unlock_irqrestore(&mdp->lock, flags);
-- 
cgit v1.2.3