115 files changed, 1723 insertions, 751 deletions
diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts
index eed89e659143..a1f4d6d5a569 100644
--- a/arch/arm/boot/dts/da850-lcdk.dts
+++ b/arch/arm/boot/dts/da850-lcdk.dts
@@ -293,12 +293,12 @@
 					label = "u-boot env";
 					reg = <0 0x020000>;
 				};
-				partition@0x020000 {
+				partition@20000 {
 					/* The LCDK defaults to booting from this partition */
 					label = "u-boot";
 					reg = <0x020000 0x080000>;
 				};
-				partition@0x0a0000 {
+				partition@a0000 {
 					label = "free space";
 					reg = <0x0a0000 0>;
 				};
diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
index cf2f5240e176..27cc913ca0f5 100644
--- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
+++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts
@@ -53,7 +53,8 @@
 		};
 
 		pinctrl: pin-controller@10000 {
-			pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+			pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+				     &pmx_gpio_header_gpo>;
 			pinctrl-names = "default";
 
 			pmx_uart0: pmx-uart0 {
@@ -85,11 +86,16 @@
 			 * ground.
 			 */
 			pmx_gpio_header: pmx-gpio-header {
-				marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+				marvell,pins = "mpp17", "mpp29", "mpp28",
 					       "mpp35", "mpp34", "mpp40";
 				marvell,function = "gpio";
 			};
 
+			pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+				marvell,pins = "mpp7";
+				marvell,function = "gpo";
+			};
+
 			pmx_gpio_init: pmx-init {
 				marvell,pins = "mpp38";
 				marvell,function = "gpio";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 5840f5c75c3b..4f2f2eea0755 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -1104,7 +1104,7 @@
 
 					be1_out_tcon0: endpoint@0 {
 						reg = <0>;
-						remote-endpoint = <&tcon1_in_be0>;
+						remote-endpoint = <&tcon0_in_be1>;
 					};
 
 					be1_out_tcon1: endpoint@1 {
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 59655e42e4b0..bd0cd3204273 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -1354,7 +1354,7 @@
 
 					be1_out_tcon0: endpoint@0 {
 						reg = <0>;
-						remote-endpoint = <&tcon1_in_be0>;
+						remote-endpoint = <&tcon0_in_be1>;
 					};
 
 					be1_out_tcon1: endpoint@1 {
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 5caaf971fb50..df433abfcb02 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -10,6 +10,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
+CONFIG_CMA=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CPU_FREQ=y
@@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_AHCI_SUNXI=y
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 4425189bb24c..a5edea07b8d5 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -27,14 +27,58 @@
 
 int bpf_jit_enable __read_mostly;
 
+/*
+ * eBPF prog stack layout:
+ *
+ *                         high
+ * original ARM_SP =>     +-----+
+ *                        |     | callee saved registers
+ *                        +-----+ <= (BPF_FP + SCRATCH_SIZE)
+ *                        | ... | eBPF JIT scratch space
+ * eBPF fp register =>    +-----+
+ *   (BPF_FP)             | ... | eBPF prog stack
+ *                        +-----+
+ *                        |RSVD | JIT scratchpad
+ * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ *                        |     |
+ *                        | ... | Function call stack
+ *                        |     |
+ *                        +-----+
+ *                          low
+ *
+ * The callee saved registers depends on whether frame pointers are enabled.
+ * With frame pointers (to be compliant with the ABI):
+ *
+ *                                high
+ * original ARM_SP =>     +------------------+ \
+ *                        |        pc        | |
+ * current ARM_FP =>      +------------------+ } callee saved registers
+ *                        |r4-r8,r10,fp,ip,lr| |
+ *                        +------------------+ /
+ *                                low
+ *
+ * Without frame pointers:
+ *
+ *                                high
+ * original ARM_SP =>     +------------------+
+ *                        | r4-r8,r10,fp,lr  | callee saved registers
+ * current ARM_FP =>      +------------------+
+ *                                low
+ *
+ * When popping registers off the stack at the end of a BPF function, we
+ * reference them via the current ARM_FP register.
+ */
+#define CALLEE_MASK	(1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
+			 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
+			 1 << ARM_FP)
+#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
+#define CALLEE_POP_MASK  (CALLEE_MASK | 1 << ARM_PC)
+
 #define STACK_OFFSET(k)	(k)
 #define TMP_REG_1	(MAX_BPF_JIT_REG + 0)	/* TEMP Register 1 */
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)	/* TEMP Register 2 */
 #define TCALL_CNT	(MAX_BPF_JIT_REG + 2)	/* Tail Call Count */
 
-/* Flags used for JIT optimization */
-#define SEEN_CALL	(1 << 0)
-
 #define FLAG_IMM_OVERFLOW	(1 << 0)
 
 /*
@@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = {
  * idx			:	index of current last JITed instruction.
  * prologue_bytes	:	bytes used in prologue.
  * epilogue_offset	:	offset of epilogue starting.
- * seen			:	bit mask used for JIT optimization.
  * offsets		:	array of eBPF instruction offsets in
  *				JITed code.
  * target		:	final JITed code.
@@ -110,7 +153,6 @@ struct jit_ctx {
 	unsigned int idx;
 	unsigned int prologue_bytes;
 	unsigned int epilogue_offset;
-	u32 seen;
 	u32 flags;
 	u32 *offsets;
 	u32 *target;
@@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size)
 		*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
 }
 
-/* Stack must be multiples of 16 Bytes */
-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+/* EABI requires the stack to be aligned to 64-bit boundaries */
+#define STACK_ALIGNMENT	8
+#else
+/* Stack must be aligned to 32-bit boundaries */
+#define STACK_ALIGNMENT	4
+#endif
 
 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
  * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
@@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size)
 	 + SCRATCH_SIZE + \
 	 + 4 /* extra for skb_copy_bits buffer */)
 
-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 
 /* Get the offset of eBPF REGISTERs stored on scratch space. */
 #define STACK_VAR(off) (STACK_SIZE-off-4)
@@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
 		emit_mov_i_no8m(rd, val, ctx);
 }
 
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
 {
-	ctx->seen |= SEEN_CALL;
-#if __LINUX_ARM_ARCH__ < 5
-	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
-
 	if (elf_hwcap & HWCAP_THUMB)
 		emit(ARM_BX(tgt_reg), ctx);
 	else
 		emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+}
+
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 5
+	emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+	emit_bx_r(tgt_reg, ctx);
 #else
 	emit(ARM_BLX_R(tgt_reg), ctx);
 #endif
@@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
 	}
 
 	/* Call appropriate function */
-	ctx->seen |= SEEN_CALL;
 	emit_mov_i(ARM_IP, op == BPF_DIV ?
 		   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
 	emit_blx_r(ARM_IP, ctx);
@@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
 	/* Do LSH operation */
 	emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
 	emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
-	/* As we are using ARM_LR */
-	ctx->seen |= SEEN_CALL;
 	emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
 	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
 	emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
@@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
 	/* Do the ARSH operation */
 	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
 	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-	/* As we are using ARM_LR */
-	ctx->seen |= SEEN_CALL;
 	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
 	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
 	_emit(ARM_COND_MI, ARM_B(0), ctx);
@@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
 	/* Do LSH operation */
 	emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
 	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-	/* As we are using ARM_LR */
-	ctx->seen |= SEEN_CALL;
 	emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
 	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
 	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
@@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
 	/* Do Multiplication */
 	emit(ARM_MUL(ARM_IP, rd, rn), ctx);
 	emit(ARM_MUL(ARM_LR, rm, rt), ctx);
-	/* As we are using ARM_LR */
-	ctx->seen |= SEEN_CALL;
 	emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
 
 	emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
@@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
 }
 
 /* dst = *(size*)(src + off) */
-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
-			      const s32 off, struct jit_ctx *ctx, const u8 sz){
+static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
+			      s32 off, struct jit_ctx *ctx, const u8 sz){
 	const u8 *tmp = bpf2a32[TMP_REG_1];
-	u8 rd = dstk ? tmp[1] : dst;
+	const u8 *rd = dstk ? tmp : dst;
 	u8 rm = src;
+	s32 off_max;
 
-	if (off) {
+	if (sz == BPF_H)
+		off_max = 0xff;
+	else
+		off_max = 0xfff;
+
+	if (off < 0 || off > off_max) {
 		emit_a32_mov_i(tmp[0], off, false, ctx);
 		emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
 		rm = tmp[0];
+		off = 0;
+	} else if (rd[1] == rm) {
+		emit(ARM_MOV_R(tmp[0], rm), ctx);
+		rm = tmp[0];
 	}
 	switch (sz) {
-	case BPF_W:
-		/* Load a Word */
-		emit(ARM_LDR_I(rd, rm, 0), ctx);
+	case BPF_B:
+		/* Load a Byte */
+		emit(ARM_LDRB_I(rd[1], rm, off), ctx);
+		emit_a32_mov_i(dst[0], 0, dstk, ctx);
 		break;
 	case BPF_H:
 		/* Load a HalfWord */
-		emit(ARM_LDRH_I(rd, rm, 0), ctx);
+		emit(ARM_LDRH_I(rd[1], rm, off), ctx);
+		emit_a32_mov_i(dst[0], 0, dstk, ctx);
 		break;
-	case BPF_B:
-		/* Load a Byte */
-		emit(ARM_LDRB_I(rd, rm, 0), ctx);
+	case BPF_W:
+		/* Load a Word */
+		emit(ARM_LDR_I(rd[1], rm, off), ctx);
+		emit_a32_mov_i(dst[0], 0, dstk, ctx);
+		break;
+	case BPF_DW:
+		/* Load a Double Word */
+		emit(ARM_LDR_I(rd[1], rm, off), ctx);
+		emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
 		break;
 	}
 	if (dstk)
-		emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+		emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
+	if (dstk && sz == BPF_DW)
+		emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
 }
 
 /* Arithmatic Operation */
@@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
 			     const u8 rn, struct jit_ctx *ctx, u8 op) {
 	switch (op) {
 	case BPF_JSET:
-		ctx->seen |= SEEN_CALL;
 		emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
 		emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
 		emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
@@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	const u8 *tcc = bpf2a32[TCALL_CNT];
 	const int idx0 = ctx->idx;
 #define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (out_offset - (cur_offset) - 2)
 	u32 off, lo, hi;
 
 	/* if (index >= array->map.max_entries)
@@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit_a32_mov_i(tmp[1], off, false, ctx);
 	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
 	emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
-	/* index (64 bit) */
+	/* index is 32-bit for arrays */
 	emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
 	/* index >= array->map.max_entries */
 	emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
@@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit_a32_mov_i(tmp2[1], off, false, ctx);
 	emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
 	emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
-	emit(ARM_BX(tmp[1]), ctx);
+	emit_bx_r(tmp[1], ctx);
 
 	/* out: */
 	if (out_offset == -1)
@@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx)
 	const u8 r2 = bpf2a32[BPF_REG_1][1];
 	const u8 r3 = bpf2a32[BPF_REG_1][0];
 	const u8 r4 = bpf2a32[BPF_REG_6][1];
-	const u8 r5 = bpf2a32[BPF_REG_6][0];
-	const u8 r6 = bpf2a32[TMP_REG_1][1];
-	const u8 r7 = bpf2a32[TMP_REG_1][0];
-	const u8 r8 = bpf2a32[TMP_REG_2][1];
-	const u8 r10 = bpf2a32[TMP_REG_2][0];
 	const u8 fplo = bpf2a32[BPF_REG_FP][1];
 	const u8 fphi = bpf2a32[BPF_REG_FP][0];
-	const u8 sp = ARM_SP;
 	const u8 *tcc = bpf2a32[TCALL_CNT];
 
-	u16 reg_set = 0;
-
-	/*
-	 * eBPF prog stack layout
-	 *
-	 *                         high
-	 * original ARM_SP =>     +-----+ eBPF prologue
-	 *                        |FP/LR|
-	 * current ARM_FP =>      +-----+
-	 *                        | ... | callee saved registers
-	 * eBPF fp register =>    +-----+ <= (BPF_FP)
-	 *                        | ... | eBPF JIT scratch space
-	 *                        |     | eBPF prog stack
-	 *                        +-----+
-	 *			  |RSVD | JIT scratchpad
-	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
-	 *                        |     |
-	 *                        | ... | Function call stack
-	 *                        |     |
-	 *                        +-----+
-	 *                          low
-	 */
-
 	/* Save callee saved registers. */
-	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
 #ifdef CONFIG_FRAME_POINTER
-	reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
-	emit(ARM_MOV_R(ARM_IP, sp), ctx);
+	u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
+	emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
 	emit(ARM_PUSH(reg_set), ctx);
 	emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
 #else
-	/* Check if call instruction exists in BPF body */
-	if (ctx->seen & SEEN_CALL)
-		reg_set |= (1<<ARM_LR);
-	emit(ARM_PUSH(reg_set), ctx);
+	emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
+	emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
 #endif
 	/* Save frame pointer for later */
-	emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+	emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
 
 	ctx->stack_size = imm8m(STACK_SIZE);
 
@@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx)
 	/* end of prologue */
 }
 
+/* restore callee saved registers. */
 static void build_epilogue(struct jit_ctx *ctx)
 {
-	const u8 r4 = bpf2a32[BPF_REG_6][1];
-	const u8 r5 = bpf2a32[BPF_REG_6][0];
-	const u8 r6 = bpf2a32[TMP_REG_1][1];
-	const u8 r7 = bpf2a32[TMP_REG_1][0];
-	const u8 r8 = bpf2a32[TMP_REG_2][1];
-	const u8 r10 = bpf2a32[TMP_REG_2][0];
-	u16 reg_set = 0;
-
-	/* unwind function call stack */
-	emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
-
-	/* restore callee saved registers. */
-	reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
 #ifdef CONFIG_FRAME_POINTER
-	/* the first instruction of the prologue was: mov ip, sp */
-	reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+	/* When using frame pointers, some additional registers need to
+	 * be loaded. */
+	u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
+	emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
 	emit(ARM_LDM(ARM_SP, reg_set), ctx);
 #else
-	if (ctx->seen & SEEN_CALL)
-		reg_set |= (1<<ARM_PC);
 	/* Restore callee saved registers. */
-	emit(ARM_POP(reg_set), ctx);
-	/* Return back to the callee function */
-	if (!(ctx->seen & SEEN_CALL))
-		emit(ARM_BX(ARM_LR), ctx);
+	emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
+	emit(ARM_POP(CALLEE_POP_MASK), ctx);
 #endif
 }
 
@@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 			emit_rev32(rt, rt, ctx);
 			goto emit_bswap_uxt;
 		case 64:
-			/* Because of the usage of ARM_LR */
-			ctx->seen |= SEEN_CALL;
 			emit_rev32(ARM_LR, rt, ctx);
 			emit_rev32(rt, rd, ctx);
 			emit(ARM_MOV_R(rd, ARM_LR), ctx);
@@ -1448,22 +1460,7 @@ exit:
 		rn = sstk ? tmp2[1] : src_lo;
 		if (sstk)
 			emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
-		switch (BPF_SIZE(code)) {
-		case BPF_W:
-			/* Load a Word */
-		case BPF_H:
-			/* Load a Half-Word */
-		case BPF_B:
-			/* Load a Byte */
-			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
-			emit_a32_mov_i(dst_hi, 0, dstk, ctx);
-			break;
-		case BPF_DW:
-			/* Load a double word */
-			emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
-			emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
-			break;
-		}
+		emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
 		break;
 	/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
 	case BPF_LD | BPF_ABS | BPF_W:
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 7c9bdc7ab50b..9db19314c60c 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -66,6 +66,7 @@
 				     <&cpu1>,
 				     <&cpu2>,
 				     <&cpu3>;
+		interrupt-parent = <&intc>;
 	};
 
 	psci {
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
index e3b64d03fbd8..9c7724e82aff 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
@@ -63,8 +63,10 @@
 			cpm_ethernet: ethernet@0 {
 				compatible = "marvell,armada-7k-pp22";
 				reg = <0x0 0x100000>, <0x129000 0xb000>;
-				clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>;
-				clock-names = "pp_clk", "gop_clk", "mg_clk";
+				clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>,
+					 <&cpm_clk 1 5>, <&cpm_clk 1 18>;
+				clock-names = "pp_clk", "gop_clk",
+					      "mg_clk","axi_clk";
 				marvell,system-controller = <&cpm_syscon0>;
 				status = "disabled";
 				dma-coherent;
@@ -155,7 +157,8 @@
 				#size-cells = <0>;
 				compatible = "marvell,orion-mdio";
 				reg = <0x12a200 0x10>;
-				clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>;
+				clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>,
+					 <&cpm_clk 1 6>, <&cpm_clk 1 18>;
 				status = "disabled";
 			};
 
@@ -338,8 +341,8 @@
 				compatible = "marvell,armada-cp110-sdhci";
 				reg = <0x780000 0x300>;
 				interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>;
-				clock-names = "core";
-				clocks = <&cpm_clk 1 4>;
+				clock-names = "core","axi";
+				clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>;
 				dma-coherent;
 				status = "disabled";
 			};
diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
index 0d51096c69f8..87ac68b2cf37 100644
--- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
@@ -63,8 +63,10 @@
 			cps_ethernet: ethernet@0 {
 				compatible = "marvell,armada-7k-pp22";
 				reg = <0x0 0x100000>, <0x129000 0xb000>;
-				clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>;
-				clock-names = "pp_clk", "gop_clk", "mg_clk";
+				clocks = <&cps_clk 1 3>, <&cps_clk 1 9>,
+					 <&cps_clk 1 5>, <&cps_clk 1 18>;
+				clock-names = "pp_clk", "gop_clk",
+					      "mg_clk", "axi_clk";
 				marvell,system-controller = <&cps_syscon0>;
 				status = "disabled";
 				dma-coherent;
@@ -155,7 +157,8 @@
 				#size-cells = <0>;
 				compatible = "marvell,orion-mdio";
 				reg = <0x12a200 0x10>;
-				clocks = <&cps_clk 1 9>, <&cps_clk 1 5>;
+				clocks = <&cps_clk 1 9>, <&cps_clk 1 5>,
+					 <&cps_clk 1 6>, <&cps_clk 1 18>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index acaa935ed977..cbfe890b0ee5 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -162,7 +162,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 /* Stack must be multiples of 16B */
 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
 
-#define PROLOGUE_OFFSET 8
+/* Tail call offset to jump into */
+#define PROLOGUE_OFFSET 7
 
 static int build_prologue(struct jit_ctx *ctx)
 {
@@ -214,19 +215,19 @@ static int build_prologue(struct jit_ctx *ctx)
 	/* Initialize tail_call_cnt */
 	emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
-	/* 4 byte extra for skb_copy_bits buffer */
-	ctx->stack_size = prog->aux->stack_depth + 4;
-	ctx->stack_size = STACK_ALIGN(ctx->stack_size);
-
-	/* Set up function call stack */
-	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
-
 	cur_offset = ctx->idx - idx0;
 	if (cur_offset != PROLOGUE_OFFSET) {
 		pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
 			    cur_offset, PROLOGUE_OFFSET);
 		return -1;
 	}
+
+	/* 4 byte extra for skb_copy_bits buffer */
+	ctx->stack_size = prog->aux->stack_depth + 4;
+	ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+
+	/* Set up function call stack */
+	emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
 	return 0;
 }
 
@@ -274,11 +275,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit(A64_LDR64(prg, tmp, prg), ctx);
 	emit(A64_CBZ(1, prg, jmp_offset), ctx);
 
-	/* goto *(prog->bpf_func + prologue_size); */
+	/* goto *(prog->bpf_func + prologue_offset); */
 	off = offsetof(struct bpf_prog, bpf_func);
 	emit_a64_mov_i64(tmp, off, ctx);
 	emit(A64_LDR64(tmp, prg, tmp), ctx);
 	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+	emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
 	emit(A64_BR(tmp), ctx);
 
 	/* out: */
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 28e02c99be6d..762eeb0fcc1d 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)				\
 ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
-#define atomic_add_return(i,v)						\
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i)	__builtin_constant_p(i) ?		\
+		((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 ||	\
+		 (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0
+
+#define atomic_add_return(i, v)						\
 ({									\
-	int __ia64_aar_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)		\
-	     || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)		\
-	     || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)		\
-	     || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))		\
-		? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)	\
-		: ia64_atomic_add(__ia64_aar_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :	\
+				ia64_atomic_add(__i, v);		\
 })
 
-#define atomic_sub_return(i,v)						\
+#define atomic_sub_return(i, v)						\
 ({									\
-	int __ia64_asr_i = (i);						\
-	(__builtin_constant_p(i)					\
-	 && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)		\
-	     || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)		\
-	     || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)		\
-	     || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))	\
-		? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)	\
-		: ia64_atomic_sub(__ia64_asr_i, v);			\
+	int __i = (i);							\
+	static const int __ia64_atomic_p = __ia64_atomic_const(i);	\
+	__ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :	\
+				ia64_atomic_sub(__i, v);		\
 })
+#else
+#define atomic_add_return(i, v)	ia64_atomic_add(i, v)
+#define atomic_sub_return(i, v)	ia64_atomic_sub(i, v)
+#endif
 
 #define atomic_fetch_add(i,v)						\
 ({									\
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c51e6ce42e7a..2ed525a44734 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -166,6 +166,7 @@ config PPC
 	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_VULNERABILITIES	if PPC_BOOK3S_64
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
 	select GENERIC_SMP_IDLE_THREAD
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index f0461618bf7b..eca3f9c68907 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -353,6 +353,7 @@
 #define PROC_TABLE_GTSE		0x01
 
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
 
 /**
  * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9d213542a48b..8fd3a70047f1 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	unsigned short maj;
 	unsigned short min;
 
-	/* We only show online cpus: disable preempt (overzealous, I
-	 * knew) to prevent cpu going down. */
-	preempt_disable();
-	if (!cpu_online(cpu_id)) {
-		preempt_enable();
-		return 0;
-	}
-
 #ifdef CONFIG_SMP
 	pvr = per_cpu(cpu_pvr, cpu_id);
 #else
@@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
 	seq_printf(m, "\n");
 #endif
-
-	preempt_enable();
-
 	/* If this is the last cpu, print the summary */
 	if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
 		show_cpuinfo_summary(m);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 491be4179ddd..e67413f4a8f0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -38,6 +38,7 @@
 #include <linux/memory.h>
 #include <linux/nmi.h>
 
+#include <asm/debugfs.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -901,4 +902,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
 	if (!no_rfi_flush)
 		rfi_flush_enable(enable);
 }
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+	if (val == 1)
+		rfi_flush_enable(true);
+	else if (val == 0)
+		rfi_flush_enable(false);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+	*val = rfi_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+	return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (rfi_flush)
+		return sprintf(buf, "Mitigation: RFI Flush\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index cab24f549e7c..0ddc7ac6c5f1 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu)
 	DUMP(p, kernel_toc, "lx");
 	DUMP(p, kernelbase, "lx");
 	DUMP(p, kernel_msr, "lx");
-	DUMP(p, emergency_sp, "p");
+	DUMP(p, emergency_sp, "px");
 #ifdef CONFIG_PPC_BOOK3S_64
-	DUMP(p, nmi_emergency_sp, "p");
-	DUMP(p, mc_emergency_sp, "p");
+	DUMP(p, nmi_emergency_sp, "px");
+	DUMP(p, mc_emergency_sp, "px");
 	DUMP(p, in_nmi, "x");
 	DUMP(p, in_mce, "x");
 	DUMP(p, hmi_event_available, "x");
@@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu)
 	DUMP(p, slb_cache_ptr, "x");
 	for (i = 0; i < SLB_CACHE_ENTRIES; i++)
 		printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
+
+	DUMP(p, rfi_flush_fallback_area, "px");
+	DUMP(p, l1d_flush_congruence, "llx");
+	DUMP(p, l1d_flush_sets, "llx");
 #endif
 	DUMP(p, dscr_default, "llx");
 #ifdef CONFIG_PPC_BOOK3E
-	DUMP(p, pgd, "p");
-	DUMP(p, kernel_pgd, "p");
-	DUMP(p, tcd_ptr, "p");
-	DUMP(p, mc_kstack, "p");
-	DUMP(p, crit_kstack, "p");
-	DUMP(p, dbg_kstack, "p");
+	DUMP(p, pgd, "px");
+	DUMP(p, kernel_pgd, "px");
+	DUMP(p, tcd_ptr, "px");
+	DUMP(p, mc_kstack, "px");
+	DUMP(p, crit_kstack, "px");
+	DUMP(p, dbg_kstack, "px");
 #endif
-	DUMP(p, __current, "p");
+	DUMP(p, __current, "px");
 	DUMP(p, kstack, "lx");
 	printf(" kstack_base          = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
 	DUMP(p, stab_rr, "lx");
@@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu)
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-	DUMP(p, core_idle_state_ptr, "p");
+	DUMP(p, core_idle_state_ptr, "px");
 	DUMP(p, thread_idle_state, "x");
 	DUMP(p, thread_mask, "x");
 	DUMP(p, subcore_sibling_mask, "x");
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a54f23a..60c4c342316c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
 	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popl	%esi
 	popl	%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4f8e1d35a97c..aa15b4c0e3d1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -491,6 +491,17 @@ ENTRY(__switch_to_asm)
 	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+	/*
+	 * When switching from a shallower to a deeper call stack
+	 * the RSB may either underflow or use entries populated
+	 * with userspace addresses. On CPUs where those concerns
+	 * exist, overwrite the RSB with entries which capture
+	 * speculative execution to prevent attack.
+	 */
+	FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
 	/* restore callee-saved registers */
 	popq	%r15
 	popq	%r14
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 005908ee9333..a2efb490f743 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsx_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,	  hsx_rapl_init),
-	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
 
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index a9e57f08bfa6..98722773391d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
 extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f275447862f4..25b9375c1484 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -206,11 +206,11 @@
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT		( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW	( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -245,6 +245,7 @@
 #define X86_FEATURE_AVX512IFMA		( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT		( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB		( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT		( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF		( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER		( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD		( 9*32+28) /* AVX-512 Conflict Detection */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c9459a4c3c68..22c5f3e6f820 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
 
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
 
 static inline bool sme_active(void) { return false; }
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 402a11c803c3..7b45d8424150 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,7 +11,7 @@
  * Fill the CPU return stack buffer.
  *
  * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; jmp' loop to capture speculative execution.
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
  *
  * This is required in various cases for retpoline and IBRS-based
  * mitigations for the Spectre variant 2 vulnerability. Sometimes to
@@ -38,11 +38,13 @@
 	call	772f;				\
 773:	/* speculation trap */			\
 	pause;					\
+	lfence;					\
 	jmp	773b;				\
 772:						\
 	call	774f;				\
 775:	/* speculation trap */			\
 	pause;					\
+	lfence;					\
 	jmp	775b;				\
 774:						\
 	dec	reg;				\
@@ -73,6 +75,7 @@
 	call	.Ldo_rop_\@
 .Lspec_trap_\@:
 	pause
+	lfence
 	jmp	.Lspec_trap_\@
 .Ldo_rop_\@:
 	mov	\reg, (%_ASM_SP)
@@ -165,6 +168,7 @@
 	"       .align 16\n"					\
 	"901:	call   903f;\n"					\
 	"902:	pause;\n"					\
+	"    	lfence;\n"					\
 	"       jmp    902b;\n"					\
 	"       .align 16\n"					\
 	"903:	addl   $4, %%esp;\n"				\
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 880441f24146..25ddf02598d2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
 	return APIC_SYMMETRIC_IO;
 }
 
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned int value;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+		return;
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+#endif
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	if (apic_extnmi == APIC_EXTNMI_NONE)
+		value |= APIC_LVT_MASKED;
+	apic_write(APIC_LVT1, value);
+}
+
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f8b03bb8e725..3cc471beb50b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 
 		err = assign_irq_vector_policy(irqd, info);
 		trace_vector_setup(virq + i, false, err);
-		if (err)
+		if (err) {
+			irqd->chip_data = NULL;
+			free_apic_chip_data(apicd);
 			goto error;
+		}
 	}
 
 	return 0;
 
 error:
-	x86_vector_free_irqs(domain, virq, i + 1);
+	x86_vector_free_irqs(domain, virq, i);
 	return err;
 }
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc26185aa7..390b3dc3d438 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
 	return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86 == 6) {
+		switch (boot_cpu_data.x86_model) {
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_SKYLAKE_X:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
+			return true;
+		}
+	}
+	return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +231,24 @@ retpoline_auto:
 
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
+
+	/*
+	 * If neither SMEP or KPTI are available, there is a risk of
+	 * hitting userspace addresses in the RSB after a context switch
+	 * from a shallow call stack to a deeper one. To prevent this fill
+	 * the entire RSB, even when using IBRS.
+	 *
+	 * Skylake era CPUs have a separate issue with *underflow* of the
+	 * RSB, when they will predict 'ret' targets from the generic BTB.
+	 * The proper mitigation for this is IBRS. If IBRS is not supported
+	 * or deactivated in favour of retpolines the RSB fill on context
+	 * switch is required.
+	 */
+	if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+	     !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Filling RSB on context switch\n");
+	}
 }
 
 #undef pr_fmt
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 88dcf8479013..99442370de40 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 		 */
 		if (static_branch_unlikely(&rdt_mon_enable_key))
 			rmdir_mondata_subdir_allrdtgrp(r, d->id);
-		kfree(d->ctrl_val);
-		kfree(d->rmid_busy_llc);
-		kfree(d->mbm_total);
-		kfree(d->mbm_local);
 		list_del(&d->list);
 		if (is_mbm_enabled())
 			cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 			cancel_delayed_work(&d->cqm_limbo);
 		}
 
+		kfree(d->ctrl_val);
+		kfree(d->rmid_busy_llc);
+		kfree(d->mbm_total);
+		kfree(d->mbm_local);
 		kfree(d);
 		return;
 	}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 05459ad3db46..d0e69769abfd 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,7 +21,6 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
 	{ X86_FEATURE_EPB,		CPUID_ECX,  3, 0x00000006, 0 },
-	{ X86_FEATURE_INTEL_PT,		CPUID_EBX, 25, 0x00000007, 0 },
 	{ X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
 	{ X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
 	{ X86_FEATURE_CAT_L3,		CPUID_EBX,  1, 0x00000010, 0 },
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a5d757b9cfd..7ba5d819ebe3 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	p = fixup_pointer(&phys_base, physaddr);
 	*p += load_delta - sme_get_me_mask();
 
-	/* Encrypt the kernel (if SME is active) */
-	sme_encrypt_kernel();
+	/* Encrypt the kernel and related (if SME is active) */
+	sme_encrypt_kernel(bp);
 
 	/*
 	 * Return the SME encryption mask (if SME is active) to be used as a
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d985cef3984f..56d99be3706a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -56,7 +56,7 @@ struct idt_data {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
 	INTG(X86_TRAP_DB,		debug),
 	SYSG(X86_TRAP_BP,		int3),
 #ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
  * the traps which use them are reinitialized with IST after cpu_init() has
  * set up TSS.
  */
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
 	INTG(X86_TRAP_DE,		divide_error),
 	INTG(X86_TRAP_NMI,		nmi),
 	INTG(X86_TRAP_BR,		bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
 /*
  * The APIC and SMP idt entries
  */
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
 #ifdef CONFIG_SMP
 	INTG(RESCHEDULE_VECTOR,		reschedule_interrupt),
 	INTG(CALL_FUNCTION_VECTOR,	call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
 	INTG(X86_TRAP_PF,		page_fault),
 };
 
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
  * Override for the debug_idt. Same as the default, but with interrupt
  * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
  */
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
 	INTG(X86_TRAP_DB,	debug),
 	INTG(X86_TRAP_BP,	int3),
 };
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * The exceptions which use Interrupt stacks. They are setup after
  * cpu_init() when the TSS has been initialized.
  */
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
 	ISTG(X86_TRAP_DB,	debug,		DEBUG_STACK),
 	ISTG(X86_TRAP_NMI,	nmi,		NMI_STACK),
 	SISTG(X86_TRAP_BP,	int3,		DEBUG_STACK),
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 8da3e909e967..a539410c4ea9 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,6 +61,9 @@ void __init init_ISA_irqs(void)
 	struct irq_chip *chip = legacy_pic->chip;
 	int i;
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+	init_bsp_APIC();
+#endif
 	legacy_pic->init(0);
 
 	for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 145810b0edf6..68d7ab81c62f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -364,16 +364,6 @@ static void __init reserve_initrd(void)
 	    !ramdisk_image || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
 
-	/*
-	 * If SME is active, this memory will be marked encrypted by the
-	 * kernel when it is accessed (including relocation). However, the
-	 * ramdisk image was loaded decrypted by the bootloader, so make
-	 * sure that it is encrypted before accessing it. For SEV the
-	 * ramdisk will already be encrypted, so only do this for SME.
-	 */
-	if (sme_active())
-		sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
-
 	initrd_start = 0;
 
 	mapped_size = memblock_mem_size(max_pfn_mapped);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8ea117f8142e..e169e85db434 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void)
 		case INTEL_FAM6_KABYLAKE_DESKTOP:
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
-		case INTEL_FAM6_SKYLAKE_X:
 		case INTEL_FAM6_ATOM_DENVERTON:
 			crystal_khz = 25000;	/* 25.0 MHz */
 			break;
@@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void)
 		}
 	}
 
+	if (crystal_khz == 0)
+		return 0;
 	/*
 	 * TSC frequency determined by CPUID is a "hardware reported"
 	 * frequency and is the most accurate one so far we have. This
@@ -1315,6 +1316,12 @@ void __init tsc_init(void)
 		(unsigned long)cpu_khz / 1000,
 		(unsigned long)cpu_khz % 1000);
 
+	if (cpu_khz != tsc_khz) {
+		pr_info("Detected %lu.%03lu MHz TSC",
+			(unsigned long)tsc_khz / 1000,
+			(unsigned long)tsc_khz % 1000);
+	}
+
 	/* Sanitize TSC ADJUST before cyc2ns gets initialized */
 	tsc_store_and_check_tsc_adjust(true);
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 06fe3d51d385..b3e40773dce0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *	     faulted on a pte with its pkey=4.
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
+static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info,
+		u32 *pkey)
 {
 	/* This is effectively an #ifdef */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return;
 
 	/* Fault not from Protection Keys: nothing to do */
-	if (si_code != SEGV_PKUERR)
+	if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV))
 		return;
 	/*
 	 * force_sig_info_fault() is called from a number of
@@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		lsb = PAGE_SHIFT;
 	info.si_addr_lsb = lsb;
 
-	fill_sig_info_pkey(si_code, &info, pkey);
+	fill_sig_info_pkey(si_signo, si_code, &info, pkey);
 
 	force_sig_info(si_signo, &info, tsk);
 }
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 47388f0c0e59..af6f2f9c6a26 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static __init void *early_alloc(size_t size, int nid)
+static __init void *early_alloc(size_t size, int nid, bool panic)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, size,
-		__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+	if (panic)
+		return memblock_virt_alloc_try_nid(size, size,
+			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
+	else
+		return memblock_virt_alloc_try_nid_nopanic(size, size,
+			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
 static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
@@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 		if (boot_cpu_has(X86_FEATURE_PSE) &&
 		    ((end - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(addr, PMD_SIZE)) {
-			p = early_alloc(PMD_SIZE, nid);
+			p = early_alloc(PMD_SIZE, nid, false);
 			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
 				return;
 			else if (p)
 				memblock_free(__pa(p), PMD_SIZE);
 		}
 
-		p = early_alloc(PAGE_SIZE, nid);
+		p = early_alloc(PAGE_SIZE, nid, true);
 		pmd_populate_kernel(&init_mm, pmd, p);
 	}
 
@@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 		if (!pte_none(*pte))
 			continue;
 
-		p = early_alloc(PAGE_SIZE, nid);
+		p = early_alloc(PAGE_SIZE, nid, true);
 		entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
 		set_pte_at(&init_mm, addr, pte, entry);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
@@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
 		if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
 		    ((end - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(addr, PUD_SIZE)) {
-			p = early_alloc(PUD_SIZE, nid);
+			p = early_alloc(PUD_SIZE, nid, false);
 			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
 				return;
 			else if (p)
 				memblock_free(__pa(p), PUD_SIZE);
 		}
 
-		p = early_alloc(PAGE_SIZE, nid);
+		p = early_alloc(PAGE_SIZE, nid, true);
 		pud_populate(&init_mm, pud, p);
 	}
 
@@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
 	unsigned long next;
 
 	if (p4d_none(*p4d)) {
-		void *p = early_alloc(PAGE_SIZE, nid);
+		void *p = early_alloc(PAGE_SIZE, nid, true);
 
 		p4d_populate(&init_mm, p4d, p);
 	}
@@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		p = early_alloc(PAGE_SIZE, nid);
+		p = early_alloc(PAGE_SIZE, nid, true);
 		pgd_populate(&init_mm, pgd, p);
 	}
 
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 391b13402e40..3ef362f598e3 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
 	set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
 }
 
-static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
-				 unsigned long end)
+struct sme_populate_pgd_data {
+	void	*pgtable_area;
+	pgd_t	*pgd;
+
+	pmdval_t pmd_flags;
+	pteval_t pte_flags;
+	unsigned long paddr;
+
+	unsigned long vaddr;
+	unsigned long vaddr_end;
+};
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
 {
 	unsigned long pgd_start, pgd_end, pgd_size;
 	pgd_t *pgd_p;
 
-	pgd_start = start & PGDIR_MASK;
-	pgd_end = end & PGDIR_MASK;
+	pgd_start = ppd->vaddr & PGDIR_MASK;
+	pgd_end = ppd->vaddr_end & PGDIR_MASK;
 
-	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
-	pgd_size *= sizeof(pgd_t);
+	pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
 
-	pgd_p = pgd_base + pgd_index(start);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 
 	memset(pgd_p, 0, pgd_size);
 }
 
-#define PGD_FLAGS	_KERNPG_TABLE_NOENC
-#define P4D_FLAGS	_KERNPG_TABLE_NOENC
-#define PUD_FLAGS	_KERNPG_TABLE_NOENC
-#define PMD_FLAGS	(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+#define PGD_FLAGS		_KERNPG_TABLE_NOENC
+#define P4D_FLAGS		_KERNPG_TABLE_NOENC
+#define PUD_FLAGS		_KERNPG_TABLE_NOENC
+#define PMD_FLAGS		_KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE		(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC		PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP	((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC		(PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS		(__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC		PTE_FLAGS
+#define PTE_FLAGS_DEC_WP	((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+				 (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC		(PTE_FLAGS | _PAGE_ENC)
 
-static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
-				     unsigned long vaddr, pmdval_t pmd_val)
+static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
 {
 	pgd_t *pgd_p;
 	p4d_t *p4d_p;
 	pud_t *pud_p;
 	pmd_t *pmd_p;
 
-	pgd_p = pgd_base + pgd_index(vaddr);
+	pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
 	if (native_pgd_val(*pgd_p)) {
 		if (IS_ENABLED(CONFIG_X86_5LEVEL))
 			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
@@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 		pgd_t pgd;
 
 		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			p4d_p = pgtable_area;
+			p4d_p = ppd->pgtable_area;
 			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
-			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
+			ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
 
 			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
 		} else {
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
 		}
@@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
 	}
 
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-		p4d_p += p4d_index(vaddr);
+		p4d_p += p4d_index(ppd->vaddr);
 		if (native_p4d_val(*p4d_p)) {
 			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
 		} else {
 			p4d_t p4d;
 
-			pud_p = pgtable_area;
+			pud_p = ppd->pgtable_area;
 			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
+			ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
 
 			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
 			native_set_p4d(p4d_p, p4d);
 		}
 	}
 
-	pud_p += pud_index(vaddr);
+	pud_p += pud_index(ppd->vaddr);
 	if (native_pud_val(*pud_p)) {
 		if (native_pud_val(*pud_p) & _PAGE_PSE)
-			goto out;
+			return NULL;
 
 		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
 	} else {
 		pud_t pud;
 
-		pmd_p = pgtable_area;
+		pmd_p = ppd->pgtable_area;
 		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
-		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
+		ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
 
 		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
 		native_set_pud(pud_p, pud);
 	}
 
-	pmd_p += pmd_index(vaddr);
+	return pmd_p;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
 	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
-		native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+		native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
+}
 
-out:
-	return pgtable_area;
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+	pmd_t *pmd_p;
+	pte_t *pte_p;
+
+	pmd_p = sme_prepare_pgd(ppd);
+	if (!pmd_p)
+		return;
+
+	pmd_p += pmd_index(ppd->vaddr);
+	if (native_pmd_val(*pmd_p)) {
+		if (native_pmd_val(*pmd_p) & _PAGE_PSE)
+			return;
+
+		pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
+	} else {
+		pmd_t pmd;
+
+		pte_p = ppd->pgtable_area;
+		memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
+		ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
+
+		pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
+		native_set_pmd(pmd_p, pmd);
+	}
+
+	pte_p += pte_index(ppd->vaddr);
+	if (!native_pte_val(*pte_p))
+		native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd_large(ppd);
+
+		ppd->vaddr += PMD_PAGE_SIZE;
+		ppd->paddr += PMD_PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+	while (ppd->vaddr < ppd->vaddr_end) {
+		sme_populate_pgd(ppd);
+
+		ppd->vaddr += PAGE_SIZE;
+		ppd->paddr += PAGE_SIZE;
+	}
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+				   pmdval_t pmd_flags, pteval_t pte_flags)
+{
+	unsigned long vaddr_end;
+
+	ppd->pmd_flags = pmd_flags;
+	ppd->pte_flags = pte_flags;
+
+	/* Save original end value since we modify the struct value */
+	vaddr_end = ppd->vaddr_end;
+
+	/* If start is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+	__sme_map_range_pte(ppd);
+
+	/* Create PMD entries */
+	ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+	__sme_map_range_pmd(ppd);
+
+	/* If end is not 2MB aligned, create PTE entries */
+	ppd->vaddr_end = vaddr_end;
+	__sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+	__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
 }
 
 static unsigned long __init sme_pgtable_calc(unsigned long len)
 {
-	unsigned long p4d_size, pud_size, pmd_size;
+	unsigned long p4d_size, pud_size, pmd_size, pte_size;
 	unsigned long total;
 
 	/*
 	 * Perform a relatively simplistic calculation of the pagetable
-	 * entries that are needed. That mappings will be covered by 2MB
-	 * PMD entries so we can conservatively calculate the required
+	 * entries that are needed. Those mappings will be covered mostly
+	 * by 2MB PMD entries so we can conservatively calculate the required
 	 * number of P4D, PUD and PMD structures needed to perform the
-	 * mappings. Incrementing the count for each covers the case where
-	 * the addresses cross entries.
+	 * mappings.  For mappings that are not 2MB aligned, PTE mappings
+	 * would be needed for the start and end portion of the address range
+	 * that fall outside of the 2MB alignment.  This results in, at most,
+	 * two extra pages to hold PTE entries for each range that is mapped.
+	 * Incrementing the count for each covers the case where the addresses
+	 * cross entries.
 	 */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
 		p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
@@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	}
 	pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
 	pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
+	pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
 
-	total = p4d_size + pud_size + pmd_size;
+	total = p4d_size + pud_size + pmd_size + pte_size;
 
 	/*
 	 * Now calculate the added pagetable structures needed to populate
@@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
 	return total;
 }
 
-void __init sme_encrypt_kernel(void)
+void __init sme_encrypt_kernel(struct boot_params *bp)
 {
 	unsigned long workarea_start, workarea_end, workarea_len;
 	unsigned long execute_start, execute_end, execute_len;
 	unsigned long kernel_start, kernel_end, kernel_len;
+	unsigned long initrd_start, initrd_end, initrd_len;
+	struct sme_populate_pgd_data ppd;
 	unsigned long pgtable_area_len;
-	unsigned long paddr, pmd_flags;
 	unsigned long decrypted_base;
-	void *pgtable_area;
-	pgd_t *pgd;
 
 	if (!sme_active())
 		return;
 
 	/*
-	 * Prepare for encrypting the kernel by building new pagetables with
-	 * the necessary attributes needed to encrypt the kernel in place.
+	 * Prepare for encrypting the kernel and initrd by building new
+	 * pagetables with the necessary attributes needed to encrypt the
+	 * kernel in place.
 	 *
 	 *   One range of virtual addresses will map the memory occupied
-	 *   by the kernel as encrypted.
+	 *   by the kernel and initrd as encrypted.
 	 *
 	 *   Another range of virtual addresses will map the memory occupied
-	 *   by the kernel as decrypted and write-protected.
+	 *   by the kernel and initrd as decrypted and write-protected.
 	 *
 	 *     The use of write-protect attribute will prevent any of the
 	 *     memory from being cached.
@@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void)
 	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
 	kernel_len = kernel_end - kernel_start;
 
+	initrd_start = 0;
+	initrd_end = 0;
+	initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+		     ((unsigned long)bp->ext_ramdisk_size << 32);
+	if (initrd_len) {
+		initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+			       ((unsigned long)bp->ext_ramdisk_image << 32);
+		initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+		initrd_len = initrd_end - initrd_start;
+	}
+#endif
+
 	/* Set the encryption workarea to be immediately after the kernel */
 	workarea_start = kernel_end;
 
@@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void)
 	 */
 	pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
 	pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+	if (initrd_len)
+		pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
 
 	/* PUDs and PMDs needed in the current pagetables for the workarea */
 	pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
 
 	/*
 	 * The total workarea includes the executable encryption area and
-	 * the pagetable area.
+	 * the pagetable area. The start of the workarea is already 2MB
+	 * aligned, align the end of the workarea on a 2MB boundary so that
+	 * we don't try to create/allocate PTE entries from the workarea
+	 * before it is mapped.
 	 */
 	workarea_len = execute_len + pgtable_area_len;
-	workarea_end = workarea_start + workarea_len;
+	workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
 
 	/*
 	 * Set the address to the start of where newly created pagetable
@@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void)
 	 * pagetables and when the new encrypted and decrypted kernel
 	 * mappings are populated.
 	 */
-	pgtable_area = (void *)execute_end;
+	ppd.pgtable_area = (void *)execute_end;
 
 	/*
 	 * Make sure the current pagetable structure has entries for
 	 * addressing the workarea.
 	 */
-	pgd = (pgd_t *)native_read_cr3_pa();
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = (pgd_t *)native_read_cr3_pa();
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
 
 	/*
 	 * A new pagetable structure is being built to allow for the kernel
-	 * to be encrypted. It starts with an empty PGD that will then be
-	 * populated with new PUDs and PMDs as the encrypted and decrypted
-	 * kernel mappings are created.
+	 * and initrd to be encrypted. It starts with an empty PGD that will
+	 * then be populated with new PUDs and PMDs as the encrypted and
+	 * decrypted kernel mappings are created.
 	 */
-	pgd = pgtable_area;
-	memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
-	pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
-
-	/* Add encrypted kernel (identity) mappings */
-	pmd_flags = PMD_FLAGS | _PAGE_ENC;
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.pgd = ppd.pgtable_area;
+	memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+	ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
 
 	/*
 	 * A different PGD index/entry must be used to get different
@@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void)
 	 * the base of the mapping.
 	 */
 	decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+	if (initrd_len) {
+		unsigned long check_base;
+
+		check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+		decrypted_base = max(decrypted_base, check_base);
+	}
 	decrypted_base <<= PGDIR_SHIFT;
 
+	/* Add encrypted kernel (identity) mappings */
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start;
+	ppd.vaddr_end = kernel_end;
+	sme_map_range_encrypted(&ppd);
+
 	/* Add decrypted, write-protected kernel (non-identity) mappings */
-	pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
-	paddr = kernel_start;
-	while (paddr < kernel_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + pmd_flags);
-
-		paddr += PMD_PAGE_SIZE;
+	ppd.paddr = kernel_start;
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_map_range_decrypted_wp(&ppd);
+
+	if (initrd_len) {
+		/* Add encrypted initrd (identity) mappings */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start;
+		ppd.vaddr_end = initrd_end;
+		sme_map_range_encrypted(&ppd);
+		/*
+		 * Add decrypted, write-protected initrd (non-identity) mappings
+		 */
+		ppd.paddr = initrd_start;
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_map_range_decrypted_wp(&ppd);
 	}
 
 	/* Add decrypted workarea mappings to both kernel mappings */
-	paddr = workarea_start;
-	while (paddr < workarea_end) {
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr,
-						paddr + PMD_FLAGS);
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start;
+	ppd.vaddr_end = workarea_end;
+	sme_map_range_decrypted(&ppd);
 
-		pgtable_area = sme_populate_pgd(pgd, pgtable_area,
-						paddr + decrypted_base,
-						paddr + PMD_FLAGS);
-
-		paddr += PMD_PAGE_SIZE;
-	}
+	ppd.paddr = workarea_start;
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_map_range_decrypted(&ppd);
 
 	/* Perform the encryption */
 	sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
-			    kernel_len, workarea_start, (unsigned long)pgd);
+			    kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+	if (initrd_len)
+		sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+				    initrd_len, workarea_start,
+				    (unsigned long)ppd.pgd);
 
 	/*
 	 * At this point we are running encrypted.  Remove the mappings for
 	 * the decrypted areas - all that is needed for this is to remove
 	 * the PGD entry/entries.
 	 */
-	sme_clear_pgd(pgd, kernel_start + decrypted_base,
-		      kernel_end + decrypted_base);
+	ppd.vaddr = kernel_start + decrypted_base;
+	ppd.vaddr_end = kernel_end + decrypted_base;
+	sme_clear_pgd(&ppd);
+
+	if (initrd_len) {
+		ppd.vaddr = initrd_start + decrypted_base;
+		ppd.vaddr_end = initrd_end + decrypted_base;
+		sme_clear_pgd(&ppd);
+	}
 
-	sme_clear_pgd(pgd, workarea_start + decrypted_base,
-		      workarea_end + decrypted_base);
+	ppd.vaddr = workarea_start + decrypted_base;
+	ppd.vaddr_end = workarea_end + decrypted_base;
+	sme_clear_pgd(&ppd);
 
 	/* Flush the TLB - no globals so cr3 is enough */
 	native_write_cr3(__native_read_cr3());
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 730e6d541df1..01f682cf77a8 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute)
 
 	/*
 	 * Entry parameters:
-	 *   RDI - virtual address for the encrypted kernel mapping
-	 *   RSI - virtual address for the decrypted kernel mapping
-	 *   RDX - length of kernel
+	 *   RDI - virtual address for the encrypted mapping
+	 *   RSI - virtual address for the decrypted mapping
+	 *   RDX - length to encrypt
 	 *   RCX - virtual address of the encryption workarea, including:
 	 *     - stack page (PAGE_SIZE)
 	 *     - encryption routine page (PAGE_SIZE)
@@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute)
 	addq	$PAGE_SIZE, %rax	/* Workarea encryption routine */
 
 	push	%r12
-	movq	%rdi, %r10		/* Encrypted kernel */
-	movq	%rsi, %r11		/* Decrypted kernel */
-	movq	%rdx, %r12		/* Kernel length */
+	movq	%rdi, %r10		/* Encrypted area */
+	movq	%rsi, %r11		/* Decrypted area */
+	movq	%rdx, %r12		/* Area length */
 
 	/* Copy encryption routine into the workarea */
 	movq	%rax, %rdi				/* Workarea encryption routine */
@@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute)
 	rep	movsb
 
 	/* Setup registers for call */
-	movq	%r10, %rdi		/* Encrypted kernel */
-	movq	%r11, %rsi		/* Decrypted kernel */
+	movq	%r10, %rdi		/* Encrypted area */
+	movq	%r11, %rsi		/* Decrypted area */
 	movq	%r8, %rdx		/* Pagetables used for encryption */
-	movq	%r12, %rcx		/* Kernel length */
+	movq	%r12, %rcx		/* Area length */
 	movq	%rax, %r8		/* Workarea encryption routine */
 	addq	$PAGE_SIZE, %r8		/* Workarea intermediate copy buffer */
 
@@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute)
 
 ENTRY(__enc_copy)
 /*
- * Routine used to encrypt kernel.
+ * Routine used to encrypt memory in place.
  *   This routine must be run outside of the kernel proper since
  *   the kernel will be encrypted during the process. So this
  *   routine is defined here and then copied to an area outside
@@ -79,19 +79,19 @@ ENTRY(__enc_copy)
  *   during execution.
  *
  *   On entry the registers must be:
- *     RDI - virtual address for the encrypted kernel mapping
- *     RSI - virtual address for the decrypted kernel mapping
+ *     RDI - virtual address for the encrypted mapping
+ *     RSI - virtual address for the decrypted mapping
  *     RDX - address of the pagetables to use for encryption
- *     RCX - length of kernel
+ *     RCX - length of area
  *      R8 - intermediate copy buffer
  *
  *     RAX - points to this routine
  *
- * The kernel will be encrypted by copying from the non-encrypted
- * kernel space to an intermediate buffer and then copying from the
- * intermediate buffer back to the encrypted kernel space. The physical
- * addresses of the two kernel space mappings are the same which
- * results in the kernel being encrypted "in place".
+ * The area will be encrypted by copying from the non-encrypted
+ * memory space to an intermediate buffer and then copying from the
+ * intermediate buffer back to the encrypted memory space. The physical
+ * addresses of the two mappings are the same which results in the area
+ * being encrypted "in place".
  */
 	/* Enable the new page tables */
 	mov	%rdx, %cr3
@@ -103,47 +103,55 @@ ENTRY(__enc_copy)
 	orq	$X86_CR4_PGE, %rdx
 	mov	%rdx, %cr4
 
+	push	%r15
+	push	%r12
+
+	movq	%rcx, %r9		/* Save area length */
+	movq	%rdi, %r10		/* Save encrypted area address */
+	movq	%rsi, %r11		/* Save decrypted area address */
+
 	/* Set the PAT register PA5 entry to write-protect */
-	push	%rcx
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	push	%rdx			/* Save original PAT value */
+	mov	%rdx, %r15		/* Save original PAT value */
 	andl	$0xffff00ff, %edx	/* Clear PA5 */
 	orl	$0x00000500, %edx	/* Set PA5 to WP */
 	wrmsr
-	pop	%rdx			/* RDX contains original PAT value */
-	pop	%rcx
-
-	movq	%rcx, %r9		/* Save kernel length */
-	movq	%rdi, %r10		/* Save encrypted kernel address */
-	movq	%rsi, %r11		/* Save decrypted kernel address */
 
 	wbinvd				/* Invalidate any cache entries */
 
-	/* Copy/encrypt 2MB at a time */
+	/* Copy/encrypt up to 2MB at a time */
+	movq	$PMD_PAGE_SIZE, %r12
 1:
-	movq	%r11, %rsi		/* Source - decrypted kernel */
+	cmpq	%r12, %r9
+	jnb	2f
+	movq	%r9, %r12
+
+2:
+	movq	%r11, %rsi		/* Source - decrypted area */
 	movq	%r8, %rdi		/* Dest   - intermediate copy buffer */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r12, %rcx
 	rep	movsb
 
 	movq	%r8, %rsi		/* Source - intermediate copy buffer */
-	movq	%r10, %rdi		/* Dest   - encrypted kernel */
-	movq	$PMD_PAGE_SIZE, %rcx	/* 2MB length */
+	movq	%r10, %rdi		/* Dest   - encrypted area */
+	movq	%r12, %rcx
 	rep	movsb
 
-	addq	$PMD_PAGE_SIZE, %r11
-	addq	$PMD_PAGE_SIZE, %r10
-	subq	$PMD_PAGE_SIZE, %r9	/* Kernel length decrement */
+	addq	%r12, %r11
+	addq	%r12, %r10
+	subq	%r12, %r9		/* Kernel length decrement */
 	jnz	1b			/* Kernel length not zero? */
 
 	/* Restore PAT register */
-	push	%rdx			/* Save original PAT value */
 	movl	$MSR_IA32_CR_PAT, %ecx
 	rdmsr
-	pop	%rdx			/* Restore original PAT value */
+	mov	%r15, %rdx		/* Restore original PAT value */
 	wrmsr
 
+	pop	%r12
+	pop	%r15
+
 	ret
 .L__enc_copy_end:
 ENDPROC(__enc_copy)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8193b38a1cae..3c09122bf038 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4449,6 +4449,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	 * https://bugzilla.kernel.org/show_bug.cgi?id=121671
 	 */
 	{ "LITEON CX1-JB*-HP",	NULL,		ATA_HORKAGE_MAX_SEC_1024 },
+	{ "LITEON EP1-*",	NULL,		ATA_HORKAGE_MAX_SEC_1024 },
 
 	/* Devices we expect to fail diagnostics */
 
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 02d78f6cecbb..ba8acca036df 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -55,7 +55,7 @@ config BCMA_DRIVER_PCI
 
 config BCMA_DRIVER_PCI_HOSTMODE
 	bool "Driver for PCI core working in hostmode"
-	depends on MIPS && BCMA_DRIVER_PCI
+	depends on MIPS && BCMA_DRIVER_PCI && PCI_DRIVERS_LEGACY
 	help
 	  PCI core hostmode operation (external PCI bus).
 
diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index f9042bcc27a4..7b14d6280e44 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -152,14 +152,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 {
 	unsigned long get_mask = 0;
 	unsigned long set_mask = 0;
-	int bit = 0;
 
-	while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio) {
-		if (gc->bgpio_dir & BIT(bit))
-			set_mask |= BIT(bit);
-		else
-			get_mask |= BIT(bit);
-	}
+	/* Make sure we first clear any bits that are zero when we read the register */
+	*bits &= ~*mask;
+
+	/* Exploit the fact that we know which directions are set */
+	set_mask = *mask & gc->bgpio_dir;
+	get_mask = *mask & ~gc->bgpio_dir;
 
 	if (set_mask)
 		*bits |= gc->read_reg(gc->reg_set) & set_mask;
@@ -176,13 +175,13 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio)
 
 /*
  * This only works if the bits in the GPIO register are in native endianness.
- * It is dirt simple and fast in this case. (Also the most common case.)
  */
 static int bgpio_get_multiple(struct gpio_chip *gc, unsigned long *mask,
 			      unsigned long *bits)
 {
-
-	*bits = gc->read_reg(gc->reg_dat) & *mask;
+	/* Make sure we first clear any bits that are zero when we read the register */
+	*bits &= ~*mask;
+	*bits |= gc->read_reg(gc->reg_dat) & *mask;
 	return 0;
 }
 
@@ -196,9 +195,12 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask,
 	unsigned long val;
 	int bit;
 
+	/* Make sure we first clear any bits that are zero when we read the register */
+	*bits &= ~*mask;
+
 	/* Create a mirrored mask */
-	bit = 0;
-	while ((bit = find_next_bit(mask, gc->ngpio, bit)) != gc->ngpio)
+	bit = -1;
+	while ((bit = find_next_bit(mask, gc->ngpio, bit + 1)) < gc->ngpio)
 		readmask |= bgpio_line2mask(gc, bit);
 
 	/* Read the register */
@@ -208,8 +210,8 @@ static int bgpio_get_multiple_be(struct gpio_chip *gc, unsigned long *mask,
 	 * Mirror the result into the "bits" result, this will give line 0
 	 * in bit 0 ... line 31 in bit 31 for a 32bit register.
 	 */
-	bit = 0;
-	while ((bit = find_next_bit(&val, gc->ngpio, bit)) != gc->ngpio)
+	bit = -1;
+	while ((bit = find_next_bit(&val, gc->ngpio, bit + 1)) < gc->ngpio)
 		*bits |= bgpio_line2mask(gc, bit);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 123585eeb87d..50f8443641b8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1211,23 +1211,6 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, enum pipe pipe)
 	     pipe_name(pipe));
 }
 
-static void assert_cursor(struct drm_i915_private *dev_priv,
-			  enum pipe pipe, bool state)
-{
-	bool cur_state;
-
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		cur_state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
-	else
-		cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
-	I915_STATE_WARN(cur_state != state,
-	     "cursor on pipe %c assertion failure (expected %s, current %s)\n",
-			pipe_name(pipe), onoff(state), onoff(cur_state));
-}
-#define assert_cursor_enabled(d, p) assert_cursor(d, p, true)
-#define assert_cursor_disabled(d, p) assert_cursor(d, p, false)
-
 void assert_pipe(struct drm_i915_private *dev_priv,
 		 enum pipe pipe, bool state)
 {
@@ -1255,77 +1238,25 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 			pipe_name(pipe), onoff(state), onoff(cur_state));
 }
 
-static void assert_plane(struct drm_i915_private *dev_priv,
-			 enum plane plane, bool state)
+static void assert_plane(struct intel_plane *plane, bool state)
 {
-	u32 val;
-	bool cur_state;
+	bool cur_state = plane->get_hw_state(plane);
 
-	val = I915_READ(DSPCNTR(plane));
-	cur_state = !!(val & DISPLAY_PLANE_ENABLE);
 	I915_STATE_WARN(cur_state != state,
-	     "plane %c assertion failure (expected %s, current %s)\n",
-			plane_name(plane), onoff(state), onoff(cur_state));
+			"%s assertion failure (expected %s, current %s)\n",
+			plane->base.name, onoff(state), onoff(cur_state));
 }
 
-#define assert_plane_enabled(d, p) assert_plane(d, p, true)
-#define assert_plane_disabled(d, p) assert_plane(d, p, false)
-
-static void assert_planes_disabled(struct drm_i915_private *dev_priv,
-				   enum pipe pipe)
-{
-	int i;
-
-	/* Primary planes are fixed to pipes on gen4+ */
-	if (INTEL_GEN(dev_priv) >= 4) {
-		u32 val = I915_READ(DSPCNTR(pipe));
-		I915_STATE_WARN(val & DISPLAY_PLANE_ENABLE,
-		     "plane %c assertion failure, should be disabled but not\n",
-		     plane_name(pipe));
-		return;
-	}
+#define assert_plane_enabled(p) assert_plane(p, true)
+#define assert_plane_disabled(p) assert_plane(p, false)
 
-	/* Need to check both planes against the pipe */
-	for_each_pipe(dev_priv, i) {
-		u32 val = I915_READ(DSPCNTR(i));
-		enum pipe cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
-			DISPPLANE_SEL_PIPE_SHIFT;
-		I915_STATE_WARN((val & DISPLAY_PLANE_ENABLE) && pipe == cur_pipe,
-		     "plane %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(i), pipe_name(pipe));
-	}
-}
-
-static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
-				    enum pipe pipe)
+static void assert_planes_disabled(struct intel_crtc *crtc)
 {
-	int sprite;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_plane *plane;
 
-	if (INTEL_GEN(dev_priv) >= 9) {
-		for_each_sprite(dev_priv, pipe, sprite) {
-			u32 val = I915_READ(PLANE_CTL(pipe, sprite));
-			I915_STATE_WARN(val & PLANE_CTL_ENABLE,
-			     "plane %d assertion failure, should be off on pipe %c but is still active\n",
-			     sprite, pipe_name(pipe));
-		}
-	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		for_each_sprite(dev_priv, pipe, sprite) {
-			u32 val = I915_READ(SPCNTR(pipe, PLANE_SPRITE0 + sprite));
-			I915_STATE_WARN(val & SP_ENABLE,
-			     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-			     sprite_name(pipe, sprite), pipe_name(pipe));
-		}
-	} else if (INTEL_GEN(dev_priv) >= 7) {
-		u32 val = I915_READ(SPRCTL(pipe));
-		I915_STATE_WARN(val & SPRITE_ENABLE,
-		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(pipe), pipe_name(pipe));
-	} else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
-		u32 val = I915_READ(DVSCNTR(pipe));
-		I915_STATE_WARN(val & DVS_ENABLE,
-		     "sprite %c assertion failure, should be off on pipe %c but is still active\n",
-		     plane_name(pipe), pipe_name(pipe));
-	}
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane)
+		assert_plane_disabled(plane);
 }
 
 static void assert_vblank_disabled(struct drm_crtc *crtc)
@@ -1918,9 +1849,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 
 	DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe));
 
-	assert_planes_disabled(dev_priv, pipe);
-	assert_cursor_disabled(dev_priv, pipe);
-	assert_sprites_disabled(dev_priv, pipe);
+	assert_planes_disabled(crtc);
 
 	/*
 	 * A pipe without a PLL won't actually be able to drive bits from
@@ -1989,9 +1918,7 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
 	 * Make sure planes won't keep trying to pump pixels to us,
 	 * or we might hang the display.
 	 */
-	assert_planes_disabled(dev_priv, pipe);
-	assert_cursor_disabled(dev_priv, pipe);
-	assert_sprites_disabled(dev_priv, pipe);
+	assert_planes_disabled(crtc);
 
 	reg = PIPECONF(cpu_transcoder);
 	val = I915_READ(reg);
@@ -2820,6 +2747,23 @@ intel_set_plane_visible(struct intel_crtc_state *crtc_state,
 		      crtc_state->active_planes);
 }
 
+static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
+					 struct intel_plane *plane)
+{
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(plane->base.state);
+
+	intel_set_plane_visible(crtc_state, plane_state, false);
+
+	if (plane->id == PLANE_PRIMARY)
+		intel_pre_disable_primary_noatomic(&crtc->base);
+
+	trace_intel_disable_plane(&plane->base, crtc);
+	plane->disable_plane(plane, crtc);
+}
+
 static void
 intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 			     struct intel_initial_plane_config *plane_config)
@@ -2877,12 +2821,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	 * simplest solution is to just disable the primary plane now and
 	 * pretend the BIOS never had it enabled.
 	 */
-	intel_set_plane_visible(to_intel_crtc_state(crtc_state),
-				to_intel_plane_state(plane_state),
-				false);
-	intel_pre_disable_primary_noatomic(&intel_crtc->base);
-	trace_intel_disable_plane(primary, intel_crtc);
-	intel_plane->disable_plane(intel_plane, intel_crtc);
+	intel_plane_disable_noatomic(intel_crtc, intel_plane);
 
 	return;
 
@@ -3385,6 +3324,31 @@ static void i9xx_disable_primary_plane(struct intel_plane *primary,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool i9xx_plane_get_hw_state(struct intel_plane *primary)
+{
+
+	struct drm_i915_private *dev_priv = to_i915(primary->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum plane plane = primary->plane;
+	enum pipe pipe = primary->pipe;
+	bool ret;
+
+	/*
+	 * Not 100% correct for planes that can move between pipes,
+	 * but that's only the case for gen2-4 which don't have any
+	 * display power wells.
+	 */
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(DSPCNTR(plane)) & DISPLAY_PLANE_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32
 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane)
 {
@@ -4866,7 +4830,8 @@ void hsw_enable_ips(struct intel_crtc *crtc)
 	 * a vblank wait.
 	 */
 
-	assert_plane_enabled(dev_priv, crtc->plane);
+	assert_plane_enabled(to_intel_plane(crtc->base.primary));
+
 	if (IS_BROADWELL(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
@@ -4899,7 +4864,8 @@ void hsw_disable_ips(struct intel_crtc *crtc)
 	if (!crtc->config->ips_enabled)
 		return;
 
-	assert_plane_enabled(dev_priv, crtc->plane);
+	assert_plane_enabled(to_intel_plane(crtc->base.primary));
+
 	if (IS_BROADWELL(dev_priv)) {
 		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
@@ -5899,6 +5865,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	enum intel_display_power_domain domain;
+	struct intel_plane *plane;
 	u64 domains;
 	struct drm_atomic_state *state;
 	struct intel_crtc_state *crtc_state;
@@ -5907,11 +5874,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	if (!intel_crtc->active)
 		return;
 
-	if (crtc->primary->state->visible) {
-		intel_pre_disable_primary_noatomic(crtc);
+	for_each_intel_plane_on_crtc(&dev_priv->drm, intel_crtc, plane) {
+		const struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
 
-		intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary));
-		crtc->primary->state->visible = false;
+		if (plane_state->base.visible)
+			intel_plane_disable_noatomic(intel_crtc, plane);
 	}
 
 	state = drm_atomic_state_alloc(crtc->dev);
@@ -9477,6 +9445,23 @@ static void i845_disable_cursor(struct intel_plane *plane,
 	i845_update_cursor(plane, NULL, NULL);
 }
 
+static bool i845_cursor_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(PIPE_A);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
 			   const struct intel_plane_state *plane_state)
 {
@@ -9670,6 +9655,28 @@ static void i9xx_disable_cursor(struct intel_plane *plane,
 	i9xx_update_cursor(plane, NULL, NULL);
 }
 
+static bool i9xx_cursor_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	/*
+	 * Not 100% correct for planes that can move between pipes,
+	 * but that's only the case for gen2-3 which don't have any
+	 * display power wells.
+	 */
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
 
 /* VESA 640x480x72Hz mode to set on the pipe */
 static const struct drm_display_mode load_detect_mode = {
@@ -13205,6 +13212,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = skl_update_plane;
 		primary->disable_plane = skl_disable_plane;
+		primary->get_hw_state = skl_plane_get_hw_state;
 	} else if (INTEL_GEN(dev_priv) >= 9) {
 		intel_primary_formats = skl_primary_formats;
 		num_formats = ARRAY_SIZE(skl_primary_formats);
@@ -13215,6 +13223,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = skl_update_plane;
 		primary->disable_plane = skl_disable_plane;
+		primary->get_hw_state = skl_plane_get_hw_state;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		intel_primary_formats = i965_primary_formats;
 		num_formats = ARRAY_SIZE(i965_primary_formats);
@@ -13222,6 +13231,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = i9xx_update_primary_plane;
 		primary->disable_plane = i9xx_disable_primary_plane;
+		primary->get_hw_state = i9xx_plane_get_hw_state;
 	} else {
 		intel_primary_formats = i8xx_primary_formats;
 		num_formats = ARRAY_SIZE(i8xx_primary_formats);
@@ -13229,6 +13239,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = i9xx_update_primary_plane;
 		primary->disable_plane = i9xx_disable_primary_plane;
+		primary->get_hw_state = i9xx_plane_get_hw_state;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9)
@@ -13318,10 +13329,12 @@ intel_cursor_plane_create(struct drm_i915_private *dev_priv,
 	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
 		cursor->update_plane = i845_update_cursor;
 		cursor->disable_plane = i845_disable_cursor;
+		cursor->get_hw_state = i845_cursor_get_hw_state;
 		cursor->check_plane = i845_check_cursor;
 	} else {
 		cursor->update_plane = i9xx_update_cursor;
 		cursor->disable_plane = i9xx_disable_cursor;
+		cursor->get_hw_state = i9xx_cursor_get_hw_state;
 		cursor->check_plane = i9xx_check_cursor;
 	}
 
@@ -14671,8 +14684,11 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 	DRM_DEBUG_KMS("disabling pipe %c due to force quirk\n",
 		      pipe_name(pipe));
 
-	assert_plane_disabled(dev_priv, PLANE_A);
-	assert_plane_disabled(dev_priv, PLANE_B);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_A)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_B)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(DSPCNTR(PLANE_C)) & DISPLAY_PLANE_ENABLE);
+	WARN_ON(I915_READ(CURCNTR(PIPE_A)) & CURSOR_MODE);
+	WARN_ON(I915_READ(CURCNTR(PIPE_B)) & CURSOR_MODE);
 
 	I915_WRITE(PIPECONF(pipe), 0);
 	POSTING_READ(PIPECONF(pipe));
@@ -14683,22 +14699,36 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 	POSTING_READ(DPLL(pipe));
 }
 
-static bool
-intel_check_plane_mapping(struct intel_crtc *crtc)
+static bool intel_plane_mapping_ok(struct intel_crtc *crtc,
+				   struct intel_plane *primary)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 val;
+	enum plane plane = primary->plane;
+	u32 val = I915_READ(DSPCNTR(plane));
 
-	if (INTEL_INFO(dev_priv)->num_pipes == 1)
-		return true;
+	return (val & DISPLAY_PLANE_ENABLE) == 0 ||
+		(val & DISPPLANE_SEL_PIPE_MASK) == DISPPLANE_SEL_PIPE(crtc->pipe);
+}
 
-	val = I915_READ(DSPCNTR(!crtc->plane));
+static void
+intel_sanitize_plane_mapping(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
 
-	if ((val & DISPLAY_PLANE_ENABLE) &&
-	    (!!(val & DISPPLANE_SEL_PIPE_MASK) == crtc->pipe))
-		return false;
+	if (INTEL_GEN(dev_priv) >= 4)
+		return;
 
-	return true;
+	for_each_intel_crtc(&dev_priv->drm, crtc) {
+		struct intel_plane *plane =
+			to_intel_plane(crtc->base.primary);
+
+		if (intel_plane_mapping_ok(crtc, plane))
+			continue;
+
+		DRM_DEBUG_KMS("%s attached to the wrong pipe, disabling plane\n",
+			      plane->base.name);
+		intel_plane_disable_noatomic(crtc, plane);
+	}
 }
 
 static bool intel_crtc_has_encoders(struct intel_crtc *crtc)
@@ -14754,33 +14784,15 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 
 		/* Disable everything but the primary plane */
 		for_each_intel_plane_on_crtc(dev, crtc, plane) {
-			if (plane->base.type == DRM_PLANE_TYPE_PRIMARY)
-				continue;
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
 
-			trace_intel_disable_plane(&plane->base, crtc);
-			plane->disable_plane(plane, crtc);
+			if (plane_state->base.visible &&
+			    plane->base.type != DRM_PLANE_TYPE_PRIMARY)
+				intel_plane_disable_noatomic(crtc, plane);
 		}
 	}
 
-	/* We need to sanitize the plane -> pipe mapping first because this will
-	 * disable the crtc (and hence change the state) if it is wrong. Note
-	 * that gen4+ has a fixed plane -> pipe mapping.  */
-	if (INTEL_GEN(dev_priv) < 4 && !intel_check_plane_mapping(crtc)) {
-		bool plane;
-
-		DRM_DEBUG_KMS("[CRTC:%d:%s] wrong plane connection detected!\n",
-			      crtc->base.base.id, crtc->base.name);
-
-		/* Pipe has the wrong plane attached and the plane is active.
-		 * Temporarily change the plane mapping and disable everything
-		 * ...  */
-		plane = crtc->plane;
-		crtc->base.primary->state->visible = true;
-		crtc->plane = !plane;
-		intel_crtc_disable_noatomic(&crtc->base, ctx);
-		crtc->plane = plane;
-	}
-
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc->active && !intel_crtc_has_encoders(crtc))
@@ -14885,24 +14897,21 @@ void i915_redisable_vga(struct drm_i915_private *dev_priv)
 	intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
 }
 
-static bool primary_get_hw_state(struct intel_plane *plane)
-{
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-
-	return I915_READ(DSPCNTR(plane->plane)) & DISPLAY_PLANE_ENABLE;
-}
-
 /* FIXME read out full plane state for all planes */
 static void readout_plane_state(struct intel_crtc *crtc)
 {
-	struct intel_plane *primary = to_intel_plane(crtc->base.primary);
-	bool visible;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane *plane;
 
-	visible = crtc->active && primary_get_hw_state(primary);
+	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
+		bool visible = plane->get_hw_state(plane);
 
-	intel_set_plane_visible(to_intel_crtc_state(crtc->base.state),
-				to_intel_plane_state(primary->base.state),
-				visible);
+		intel_set_plane_visible(crtc_state, plane_state, visible);
+	}
 }
 
 static void intel_modeset_readout_hw_state(struct drm_device *dev)
@@ -15100,6 +15109,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	/* HW state is read out, now we need to sanitize this mess. */
 	get_encoder_power_domains(dev_priv);
 
+	intel_sanitize_plane_mapping(dev_priv);
+
 	for_each_intel_encoder(dev, encoder) {
 		intel_sanitize_encoder(encoder);
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6c7f8bca574e..5d77f75a9f9c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -862,6 +862,7 @@ struct intel_plane {
 			     const struct intel_plane_state *plane_state);
 	void (*disable_plane)(struct intel_plane *plane,
 			      struct intel_crtc *crtc);
+	bool (*get_hw_state)(struct intel_plane *plane);
 	int (*check_plane)(struct intel_plane *plane,
 			   struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *state);
@@ -1924,6 +1925,7 @@ void skl_update_plane(struct intel_plane *plane,
 		      const struct intel_crtc_state *crtc_state,
 		      const struct intel_plane_state *plane_state);
 void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc);
+bool skl_plane_get_hw_state(struct intel_plane *plane);
 
 /* intel_tv.c */
 void intel_tv_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 4fcf80ca91dd..4a8a5d918a83 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -329,6 +329,26 @@ skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+bool
+skl_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static void
 chv_update_csc(struct intel_plane *plane, uint32_t format)
 {
@@ -506,6 +526,26 @@ vlv_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+vlv_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
@@ -646,6 +686,25 @@ ivb_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+ivb_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret =  I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static u32 g4x_sprite_ctl(const struct intel_crtc_state *crtc_state,
 			  const struct intel_plane_state *plane_state)
 {
@@ -777,6 +836,25 @@ g4x_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
+static bool
+g4x_plane_get_hw_state(struct intel_plane *plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	enum intel_display_power_domain power_domain;
+	enum pipe pipe = plane->pipe;
+	bool ret;
+
+	power_domain = POWER_DOMAIN_PIPE(pipe);
+	if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+		return false;
+
+	ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE;
+
+	intel_display_power_put(dev_priv, power_domain);
+
+	return ret;
+}
+
 static int
 intel_check_sprite_plane(struct intel_plane *plane,
 			 struct intel_crtc_state *crtc_state,
@@ -1232,6 +1310,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		intel_plane->get_hw_state = skl_plane_get_hw_state;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1242,6 +1321,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = skl_update_plane;
 		intel_plane->disable_plane = skl_disable_plane;
+		intel_plane->get_hw_state = skl_plane_get_hw_state;
 
 		plane_formats = skl_plane_formats;
 		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
@@ -1252,6 +1332,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = vlv_update_plane;
 		intel_plane->disable_plane = vlv_disable_plane;
+		intel_plane->get_hw_state = vlv_plane_get_hw_state;
 
 		plane_formats = vlv_plane_formats;
 		num_plane_formats = ARRAY_SIZE(vlv_plane_formats);
@@ -1267,6 +1348,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = ivb_update_plane;
 		intel_plane->disable_plane = ivb_disable_plane;
+		intel_plane->get_hw_state = ivb_plane_get_hw_state;
 
 		plane_formats = snb_plane_formats;
 		num_plane_formats = ARRAY_SIZE(snb_plane_formats);
@@ -1277,6 +1359,7 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv,
 
 		intel_plane->update_plane = g4x_update_plane;
 		intel_plane->disable_plane = g4x_disable_plane;
+		intel_plane->get_hw_state = g4x_plane_get_hw_state;
 
 		modifiers = i9xx_plane_format_modifiers;
 		if (IS_GEN6(dev_priv)) {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 0760b93e9d1f..baab93398e54 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -121,6 +121,7 @@ int nv41_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int nv44_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int nv50_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int g84_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
+int mcp77_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gf100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gk104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gk20a_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 435ff8662cfa..ef687414969e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1447,11 +1447,13 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 				args.nv50.ro = 0;
 				args.nv50.kind = mem->kind;
 				args.nv50.comp = mem->comp;
+				argc = sizeof(args.nv50);
 				break;
 			case NVIF_CLASS_MEM_GF100:
 				args.gf100.version = 0;
 				args.gf100.ro = 0;
 				args.gf100.kind = mem->kind;
+				argc = sizeof(args.gf100);
 				break;
 			default:
 				WARN_ON(1);
@@ -1459,7 +1461,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 			}
 
 			ret = nvif_object_map_handle(&mem->mem.object,
-						     &argc, argc,
+						     &args, argc,
 						     &handle, &length);
 			if (ret != 1)
 				return ret ? ret : -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 00eeaaffeae5..08e77cd55e6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1251,7 +1251,7 @@ nvaa_chipset = {
 	.i2c = g94_i2c_new,
 	.imem = nv50_instmem_new,
 	.mc = g98_mc_new,
-	.mmu = g84_mmu_new,
+	.mmu = mcp77_mmu_new,
 	.mxm = nv50_mxm_new,
 	.pci = g94_pci_new,
 	.therm = g84_therm_new,
@@ -1283,7 +1283,7 @@ nvac_chipset = {
 	.i2c = g94_i2c_new,
 	.imem = nv50_instmem_new,
 	.mc = g98_mc_new,
-	.mmu = g84_mmu_new,
+	.mmu = mcp77_mmu_new,
 	.mxm = nv50_mxm_new,
 	.pci = g94_pci_new,
 	.therm = g84_therm_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
index 9646adec57cb..243f0a5c8a62 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
@@ -73,7 +73,8 @@ static int
 nvkm_bar_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_bar *bar = nvkm_bar(subdev);
-	bar->func->bar1.fini(bar);
+	if (bar->func->bar1.fini)
+		bar->func->bar1.fini(bar);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
index b10077d38839..35878fb538f2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
@@ -26,7 +26,6 @@ gk20a_bar_func = {
 	.dtor = gf100_bar_dtor,
 	.oneinit = gf100_bar_oneinit,
 	.bar1.init = gf100_bar_bar1_init,
-	.bar1.fini = gf100_bar_bar1_fini,
 	.bar1.wait = gf100_bar_bar1_wait,
 	.bar1.vmm = gf100_bar_bar1_vmm,
 	.flush = g84_bar_flush,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
index 352a65f9371c..67ee983bb026 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
@@ -4,6 +4,7 @@ nvkm-y += nvkm/subdev/mmu/nv41.o
 nvkm-y += nvkm/subdev/mmu/nv44.o
 nvkm-y += nvkm/subdev/mmu/nv50.o
 nvkm-y += nvkm/subdev/mmu/g84.o
+nvkm-y += nvkm/subdev/mmu/mcp77.o
 nvkm-y += nvkm/subdev/mmu/gf100.o
 nvkm-y += nvkm/subdev/mmu/gk104.o
 nvkm-y += nvkm/subdev/mmu/gk20a.o
@@ -22,6 +23,7 @@ nvkm-y += nvkm/subdev/mmu/vmmnv04.o
 nvkm-y += nvkm/subdev/mmu/vmmnv41.o
 nvkm-y += nvkm/subdev/mmu/vmmnv44.o
 nvkm-y += nvkm/subdev/mmu/vmmnv50.o
+nvkm-y += nvkm/subdev/mmu/vmmmcp77.o
 nvkm-y += nvkm/subdev/mmu/vmmgf100.o
 nvkm-y += nvkm/subdev/mmu/vmmgk104.o
 nvkm-y += nvkm/subdev/mmu/vmmgk20a.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c
new file mode 100644
index 000000000000..0527b50730d9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "mem.h"
+#include "vmm.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_mmu_func
+mcp77_mmu = {
+	.dma_bits = 40,
+	.mmu = {{ -1, -1, NVIF_CLASS_MMU_NV50}},
+	.mem = {{ -1,  0, NVIF_CLASS_MEM_NV50}, nv50_mem_new, nv50_mem_map },
+	.vmm = {{ -1, -1, NVIF_CLASS_VMM_NV50}, mcp77_vmm_new, false, 0x0200 },
+	.kind = nv50_mmu_kind,
+	.kind_sys = true,
+};
+
+int
+mcp77_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+{
+	return nvkm_mmu_new_(&mcp77_mmu, device, index, pmmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 6d8f61ea467a..da06e64d8a7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -95,6 +95,9 @@ struct nvkm_vmm_desc {
 	const struct nvkm_vmm_desc_func *func;
 };
 
+extern const struct nvkm_vmm_desc nv50_vmm_desc_12[];
+extern const struct nvkm_vmm_desc nv50_vmm_desc_16[];
+
 extern const struct nvkm_vmm_desc gk104_vmm_desc_16_12[];
 extern const struct nvkm_vmm_desc gk104_vmm_desc_16_16[];
 extern const struct nvkm_vmm_desc gk104_vmm_desc_17_12[];
@@ -169,6 +172,11 @@ int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32,
 		  const char *, struct nvkm_vmm **);
 int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 
+int nv50_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
+void nv50_vmm_part(struct nvkm_vmm *, struct nvkm_memory *);
+int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
+void nv50_vmm_flush(struct nvkm_vmm *, int);
+
 int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
 		   struct nvkm_mmu *, u64, u64, void *, u32,
 		   struct lock_class_key *, const char *, struct nvkm_vmm **);
@@ -200,6 +208,8 @@ int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
 int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
+int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+		  struct lock_class_key *, const char *, struct nvkm_vmm **);
 int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		struct lock_class_key *, const char *, struct nvkm_vmm **);
 int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
new file mode 100644
index 000000000000..e63d984cbfd4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2017 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "vmm.h"
+
+static const struct nvkm_vmm_func
+mcp77_vmm = {
+	.join = nv50_vmm_join,
+	.part = nv50_vmm_part,
+	.valid = nv50_vmm_valid,
+	.flush = nv50_vmm_flush,
+	.page_block = 1 << 29,
+	.page = {
+		{ 16, &nv50_vmm_desc_16[0], NVKM_VMM_PAGE_xVxx },
+		{ 12, &nv50_vmm_desc_12[0], NVKM_VMM_PAGE_xVHx },
+		{}
+	}
+};
+
+int
+mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
+	      struct lock_class_key *key, const char *name,
+	      struct nvkm_vmm **pvmm)
+{
+	return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size,
+			     argv, argc, key, name, pvmm);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
index 863a2edd9861..64f75d906202 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
@@ -32,7 +32,7 @@ static inline void
 nv50_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
 		 u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr)
 {
-	u64 next = addr | map->type, data;
+	u64 next = addr + map->type, data;
 	u32 pten;
 	int log2blk;
 
@@ -69,7 +69,7 @@ nv50_vmm_pgt_dma(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
 		VMM_SPAM(vmm, "DMAA %08x %08x PTE(s)", ptei, ptes);
 		nvkm_kmap(pt->memory);
 		while (ptes--) {
-			const u64 data = *map->dma++ | map->type;
+			const u64 data = *map->dma++ + map->type;
 			VMM_WO064(pt, vmm, ptei++ * 8, data);
 			map->type += map->ctag;
 		}
@@ -163,21 +163,21 @@ nv50_vmm_pgd = {
 	.pde = nv50_vmm_pgd_pde,
 };
 
-static const struct nvkm_vmm_desc
+const struct nvkm_vmm_desc
 nv50_vmm_desc_12[] = {
 	{ PGT, 17, 8, 0x1000, &nv50_vmm_pgt },
 	{ PGD, 11, 0, 0x0000, &nv50_vmm_pgd },
 	{}
 };
 
-static const struct nvkm_vmm_desc
+const struct nvkm_vmm_desc
 nv50_vmm_desc_16[] = {
 	{ PGT, 13, 8, 0x1000, &nv50_vmm_pgt },
 	{ PGD, 11, 0, 0x0000, &nv50_vmm_pgd },
 	{}
 };
 
-static void
+void
 nv50_vmm_flush(struct nvkm_vmm *vmm, int level)
 {
 	struct nvkm_subdev *subdev = &vmm->mmu->subdev;
@@ -223,7 +223,7 @@ nv50_vmm_flush(struct nvkm_vmm *vmm, int level)
 	mutex_unlock(&subdev->mutex);
 }
 
-static int
+int
 nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 	       struct nvkm_vmm_map *map)
 {
@@ -321,7 +321,7 @@ nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 	return 0;
 }
 
-static void
+void
 nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
 	struct nvkm_vmm_join *join;
@@ -335,7 +335,7 @@ nv50_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 	}
 }
 
-static int
+int
 nv50_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
 	const u32 pd_offset = vmm->mmu->func->vmm.pd_offset;
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
index dc332ea56f6c..3ecffa52c814 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
@@ -102,10 +102,13 @@ static int sun4i_tmds_determine_rate(struct clk_hw *hw,
 					goto out;
 				}
 
-				if (abs(rate - rounded / i) <
-				    abs(rate - best_parent / best_div)) {
+				if (!best_parent ||
+				    abs(rate - rounded / i / j) <
+				    abs(rate - best_parent / best_half /
+					best_div)) {
 					best_parent = rounded;
-					best_div = i;
+					best_half = i;
+					best_div = j;
 				}
 			}
 		}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 641294aef165..fcd58145d0da 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1863,7 +1863,7 @@ u32 vmw_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
  */
 int vmw_enable_vblank(struct drm_device *dev, unsigned int pipe)
 {
-	return -ENOSYS;
+	return -EINVAL;
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b8a09807c5de..3824595fece1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -266,8 +266,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = {
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_ldu_connector_destroy,
 	.reset = vmw_du_connector_reset,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.atomic_duplicate_state = vmw_du_connector_duplicate_state,
+	.atomic_destroy_state = vmw_du_connector_destroy_state,
 	.atomic_set_property = vmw_du_connector_atomic_set_property,
 	.atomic_get_property = vmw_du_connector_atomic_get_property,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index bc5f6026573d..63a4cd794b73 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -420,8 +420,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = {
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_sou_connector_destroy,
 	.reset = vmw_du_connector_reset,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.atomic_duplicate_state = vmw_du_connector_duplicate_state,
+	.atomic_destroy_state = vmw_du_connector_destroy_state,
 	.atomic_set_property = vmw_du_connector_atomic_set_property,
 	.atomic_get_property = vmw_du_connector_atomic_get_property,
 };
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 706164b4c5be..f7829a74140c 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -821,8 +821,12 @@ void i2c_unregister_device(struct i2c_client *client)
 {
 	if (!client)
 		return;
-	if (client->dev.of_node)
+
+	if (client->dev.of_node) {
 		of_node_clear_flag(client->dev.of_node, OF_POPULATED);
+		of_node_put(client->dev.of_node);
+	}
+
 	if (ACPI_COMPANION(&client->dev))
 		acpi_device_clear_enumerated(ACPI_COMPANION(&client->dev));
 	device_unregister(&client->dev);
diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 4bb9927afd01..a1082c04ac5c 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -397,16 +397,17 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 				   the underlying bus driver */
 		break;
 	case I2C_SMBUS_I2C_BLOCK_DATA:
+		if (data->block[0] > I2C_SMBUS_BLOCK_MAX) {
+			dev_err(&adapter->dev, "Invalid block %s size %d\n",
+				read_write == I2C_SMBUS_READ ? "read" : "write",
+				data->block[0]);
+			return -EINVAL;
+		}
+
 		if (read_write == I2C_SMBUS_READ) {
 			msg[1].len = data->block[0];
 		} else {
 			msg[0].len = data->block[0] + 1;
-			if (msg[0].len > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(&adapter->dev,
-					"Invalid block write size %d\n",
-					data->block[0]);
-				return -EINVAL;
-			}
 			for (i = 1; i <= data->block[0]; i++)
 				msgbuf0[i] = data->block[i];
 		}
diff --git a/drivers/input/misc/twl4030-vibra.c b/drivers/input/misc/twl4030-vibra.c
index 6c51d404874b..c37aea9ac272 100644
--- a/drivers/input/misc/twl4030-vibra.c
+++ b/drivers/input/misc/twl4030-vibra.c
@@ -178,12 +178,14 @@ static SIMPLE_DEV_PM_OPS(twl4030_vibra_pm_ops,
 			 twl4030_vibra_suspend, twl4030_vibra_resume);
 
 static bool twl4030_vibra_check_coexist(struct twl4030_vibra_data *pdata,
-			      struct device_node *node)
+			      struct device_node *parent)
 {
+	struct device_node *node;
+
 	if (pdata && pdata->coexist)
 		return true;
 
-	node = of_find_node_by_name(node, "codec");
+	node = of_get_child_by_name(parent, "codec");
 	if (node) {
 		of_node_put(node);
 		return true;
diff --git a/drivers/input/misc/twl6040-vibra.c b/drivers/input/misc/twl6040-vibra.c
index 5690eb7ff954..15e0d352c4cc 100644
--- a/drivers/input/misc/twl6040-vibra.c
+++ b/drivers/input/misc/twl6040-vibra.c
@@ -248,8 +248,7 @@ static int twl6040_vibra_probe(struct platform_device *pdev)
 	int vddvibr_uV = 0;
 	int error;
 
-	of_node_get(twl6040_core_dev->of_node);
-	twl6040_core_node = of_find_node_by_name(twl6040_core_dev->of_node,
+	twl6040_core_node = of_get_child_by_name(twl6040_core_dev->of_node,
 						 "vibra");
 	if (!twl6040_core_node) {
 		dev_err(&pdev->dev, "parent of node is missing?\n");
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 579b899add26..dbe57da8c1a1 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1250,29 +1250,32 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
 	case SS4_PACKET_ID_MULTI:
 		if (priv->flags & ALPS_BUTTONPAD) {
 			if (IS_SS4PLUS_DEV(priv->dev_id)) {
-				f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
-				f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+				f->mt[2].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
+				f->mt[3].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+				no_data_x = SS4_PLUS_MFPACKET_NO_AX_BL;
 			} else {
 				f->mt[2].x = SS4_BTL_MF_X_V2(p, 0);
 				f->mt[3].x = SS4_BTL_MF_X_V2(p, 1);
+				no_data_x = SS4_MFPACKET_NO_AX_BL;
 			}
+			no_data_y = SS4_MFPACKET_NO_AY_BL;
 
 			f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0);
 			f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1);
-			no_data_x = SS4_MFPACKET_NO_AX_BL;
-			no_data_y = SS4_MFPACKET_NO_AY_BL;
 		} else {
 			if (IS_SS4PLUS_DEV(priv->dev_id)) {
-				f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0);
-				f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+				f->mt[2].x = SS4_PLUS_STD_MF_X_V2(p, 0);
+				f->mt[3].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+				no_data_x = SS4_PLUS_MFPACKET_NO_AX;
 			} else {
-				f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
-				f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
+				f->mt[2].x = SS4_STD_MF_X_V2(p, 0);
+				f->mt[3].x = SS4_STD_MF_X_V2(p, 1);
+				no_data_x = SS4_MFPACKET_NO_AX;
 			}
+			no_data_y = SS4_MFPACKET_NO_AY;
+
 			f->mt[2].y = SS4_STD_MF_Y_V2(p, 0);
 			f->mt[3].y = SS4_STD_MF_Y_V2(p, 1);
-			no_data_x = SS4_MFPACKET_NO_AX;
-			no_data_y = SS4_MFPACKET_NO_AY;
 		}
 
 		f->first_mp = 0;
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index c80a7c76cb76..79b6d69d1486 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -141,10 +141,12 @@ enum SS4_PACKET_ID {
 #define SS4_TS_Z_V2(_b)		(s8)(_b[4] & 0x7F)
 
 
-#define SS4_MFPACKET_NO_AX	8160	/* X-Coordinate value */
-#define SS4_MFPACKET_NO_AY	4080	/* Y-Coordinate value */
-#define SS4_MFPACKET_NO_AX_BL	8176	/* Buttonless X-Coordinate value */
-#define SS4_MFPACKET_NO_AY_BL	4088	/* Buttonless Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX		8160	/* X-Coordinate value */
+#define SS4_MFPACKET_NO_AY		4080	/* Y-Coordinate value */
+#define SS4_MFPACKET_NO_AX_BL		8176	/* Buttonless X-Coord value */
+#define SS4_MFPACKET_NO_AY_BL		4088	/* Buttonless Y-Coord value */
+#define SS4_PLUS_MFPACKET_NO_AX		4080	/* SS4 PLUS, X */
+#define SS4_PLUS_MFPACKET_NO_AX_BL	4088	/* Buttonless SS4 PLUS, X */
 
 /*
  * enum V7_PACKET_ID - defines the packet type for V7
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index ee5466a374bf..cd9f61cb3fc6 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -173,6 +173,7 @@ static const char * const smbus_pnp_ids[] = {
 	"LEN0046", /* X250 */
 	"LEN004a", /* W541 */
 	"LEN200f", /* T450s */
+	"LEN2018", /* T460p */
 	NULL
 };
 
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 4f2bb5947a4e..141ea228aac6 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -230,8 +230,10 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id)
 		rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev,
 			"Failed to process interrupt request: %d\n", ret);
 
-	if (count)
+	if (count) {
 		kfree(attn_data.data);
+		attn_data.data = NULL;
+	}
 
 	if (!kfifo_is_empty(&drvdata->attn_fifo))
 		return rmi_irq_fn(irq, dev_id);
diff --git a/drivers/input/touchscreen/88pm860x-ts.c b/drivers/input/touchscreen/88pm860x-ts.c
index 7ed828a51f4c..3486d9403805 100644
--- a/drivers/input/touchscreen/88pm860x-ts.c
+++ b/drivers/input/touchscreen/88pm860x-ts.c
@@ -126,7 +126,7 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
 	int data, n, ret;
 	if (!np)
 		return -ENODEV;
-	np = of_find_node_by_name(np, "touch");
+	np = of_get_child_by_name(np, "touch");
 	if (!np) {
 		dev_err(&pdev->dev, "Can't find touch node\n");
 		return -EINVAL;
@@ -144,13 +144,13 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
 	if (data) {
 		ret = pm860x_reg_write(i2c, PM8607_GPADC_MISC1, data);
 		if (ret < 0)
-			return -EINVAL;
+			goto err_put_node;
 	}
 	/* set tsi prebias time */
 	if (!of_property_read_u32(np, "marvell,88pm860x-tsi-prebias", &data)) {
 		ret = pm860x_reg_write(i2c, PM8607_TSI_PREBIAS, data);
 		if (ret < 0)
-			return -EINVAL;
+			goto err_put_node;
 	}
 	/* set prebias & prechg time of pen detect */
 	data = 0;
@@ -161,10 +161,18 @@ static int pm860x_touch_dt_init(struct platform_device *pdev,
 	if (data) {
 		ret = pm860x_reg_write(i2c, PM8607_PD_PREBIAS, data);
 		if (ret < 0)
-			return -EINVAL;
+			goto err_put_node;
 	}
 	of_property_read_u32(np, "marvell,88pm860x-resistor-X", res_x);
+
+	of_node_put(np);
+
 	return 0;
+
+err_put_node:
+	of_node_put(np);
+
+	return -EINVAL;
 }
 #else
 #define pm860x_touch_dt_init(x, y, z)	(-1)
diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c
index 8d7f9c8f2771..9642f103b726 100644
--- a/drivers/input/touchscreen/of_touchscreen.c
+++ b/drivers/input/touchscreen/of_touchscreen.c
@@ -13,6 +13,7 @@
 #include <linux/input.h>
 #include <linux/input/mt.h>
 #include <linux/input/touchscreen.h>
+#include <linux/module.h>
 
 static bool touchscreen_get_prop_u32(struct device *dev,
 				     const char *property,
@@ -185,3 +186,6 @@ void touchscreen_report_pos(struct input_dev *input,
 	input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y);
 }
 EXPORT_SYMBOL(touchscreen_report_pos);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Device-tree helpers functions for touchscreen devices");
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 85140c9af581..8b941f814472 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -687,6 +687,20 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
 		return;
 	}
 
+	/* For i.MX53 eSDHCv3, SYSCTL.SDCLKFS may not be set to 0. */
+	if (is_imx53_esdhc(imx_data)) {
+		/*
+		 * According to the i.MX53 reference manual, if DLLCTRL[10] can
+		 * be set, then the controller is eSDHCv3, else it is eSDHCv2.
+		 */
+		val = readl(host->ioaddr + ESDHC_DLL_CTRL);
+		writel(val | BIT(10), host->ioaddr + ESDHC_DLL_CTRL);
+		temp = readl(host->ioaddr + ESDHC_DLL_CTRL);
+		writel(val, host->ioaddr + ESDHC_DLL_CTRL);
+		if (temp & BIT(10))
+			pre_div = 2;
+	}
+
 	temp = sdhci_readl(host, ESDHC_SYSTEM_CONTROL);
 	temp &= ~(ESDHC_CLOCK_IPGEN | ESDHC_CLOCK_HCKEN | ESDHC_CLOCK_PEREN
 		| ESDHC_CLOCK_MASK);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 18ff127020c0..dd161c5eea8e 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
 	void *cmd_head = pcan_usb_fd_cmd_buffer(dev);
 	int err = 0;
 	u8 *packet_ptr;
-	int i, n = 1, packet_len;
+	int packet_len;
 	ptrdiff_t cmd_len;
 
 	/* usb device unregistered? */
@@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
 	}
 
 	packet_ptr = cmd_head;
+	packet_len = cmd_len;
 
 	/* firmware is not able to re-assemble 512 bytes buffer in full-speed */
-	if ((dev->udev->speed != USB_SPEED_HIGH) &&
-	    (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) {
-		packet_len = PCAN_UFD_LOSPD_PKT_SIZE;
-		n += cmd_len / packet_len;
-	} else {
-		packet_len = cmd_len;
-	}
+	if (unlikely(dev->udev->speed != USB_SPEED_HIGH))
+		packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE);
 
-	for (i = 0; i < n; i++) {
+	do {
 		err = usb_bulk_msg(dev->udev,
 				   usb_sndbulkpipe(dev->udev,
 						   PCAN_USBPRO_EP_CMDOUT),
@@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
 		}
 
 		packet_ptr += packet_len;
-	}
+		cmd_len -= packet_len;
+
+		if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE)
+			packet_len = cmd_len;
+
+	} while (packet_len > 0);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 7892f2f0c6b5..2c2976a2dda6 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void fs_timeout(struct net_device *dev)
+static void fs_timeout_work(struct work_struct *work)
 {
-	struct fs_enet_private *fep = netdev_priv(dev);
+	struct fs_enet_private *fep = container_of(work, struct fs_enet_private,
+						   timeout_work);
+	struct net_device *dev = fep->ndev;
 	unsigned long flags;
 	int wake = 0;
 
@@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev)
 		phy_stop(dev->phydev);
 		(*fep->ops->stop)(dev);
 		(*fep->ops->restart)(dev);
-		phy_start(dev->phydev);
 	}
 
 	phy_start(dev->phydev);
@@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev)
 		netif_wake_queue(dev);
 }
 
+static void fs_timeout(struct net_device *dev)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+
+	schedule_work(&fep->timeout_work);
+}
+
 /*-----------------------------------------------------------------------------
  *  generic link-change handler - should be sufficient for most cases
  *-----------------------------------------------------------------------------*/
@@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev)
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
 	napi_disable(&fep->napi);
+	cancel_work_sync(&fep->timeout_work);
 	phy_stop(dev->phydev);
 
 	spin_lock_irqsave(&fep->lock, flags);
@@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
 	ndev->netdev_ops = &fs_enet_netdev_ops;
 	ndev->watchdog_timeo = 2 * HZ;
+	INIT_WORK(&fep->timeout_work, fs_timeout_work);
 	netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight);
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 92e06b37a199..195fae6aec4a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -125,6 +125,7 @@ struct fs_enet_private {
 	spinlock_t lock;	/* during all ops except TX pckt processing */
 	spinlock_t tx_lock;	/* during fs_start_xmit and fs_tx         */
 	struct fs_platform_info *fpi;
+	struct work_struct timeout_work;
 	const struct fs_ops *ops;
 	int rx_ring, tx_ring;
 	dma_addr_t ring_mem_addr;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 736df59c16f5..be2ce8dece4a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1280,6 +1280,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned char *dst;
 	u64 *handle_array;
 	int index = 0;
+	u8 proto = 0;
 	int ret = 0;
 
 	if (adapter->resetting) {
@@ -1368,17 +1369,18 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (skb->protocol == htons(ETH_P_IP)) {
-		if (ip_hdr(skb)->version == 4)
-			tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
-		else if (ip_hdr(skb)->version == 6)
-			tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
-
-		if (ip_hdr(skb)->protocol == IPPROTO_TCP)
-			tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
-		else if (ip_hdr(skb)->protocol != IPPROTO_TCP)
-			tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV4;
+		proto = ip_hdr(skb)->protocol;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_IPV6;
+		proto = ipv6_hdr(skb)->nexthdr;
 	}
 
+	if (proto == IPPROTO_TCP)
+		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_TCP;
+	else if (proto == IPPROTO_UDP)
+		tx_crq.v1.flags1 |= IBMVNIC_TX_PROT_UDP;
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
 		hdrs += 2;
@@ -3357,7 +3359,11 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
 		return;
 	}
 
+	adapter->ip_offload_ctrl.len =
+	    cpu_to_be32(sizeof(adapter->ip_offload_ctrl));
 	adapter->ip_offload_ctrl.version = cpu_to_be32(INITIAL_VERSION_IOB);
+	adapter->ip_offload_ctrl.ipv4_chksum = buf->ipv4_chksum;
+	adapter->ip_offload_ctrl.ipv6_chksum = buf->ipv6_chksum;
 	adapter->ip_offload_ctrl.tcp_ipv4_chksum = buf->tcp_ipv4_chksum;
 	adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
 	adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 7f605221a686..a434fecfdfeb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -2463,7 +2463,6 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
 	return err;
 }
 
-#ifdef CONFIG_PM
 /**
  * fm10k_resume - Generic PM resume hook
  * @dev: generic device structure
@@ -2472,7 +2471,7 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
  * suspend or hibernation. This function does not need to handle lower PCIe
  * device state as the stack takes care of that for us.
  **/
-static int fm10k_resume(struct device *dev)
+static int __maybe_unused fm10k_resume(struct device *dev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
 	struct net_device *netdev = interface->netdev;
@@ -2499,7 +2498,7 @@ static int fm10k_resume(struct device *dev)
  * system suspend or hibernation. This function does not need to handle lower
  * PCIe device state as the stack takes care of that for us.
  **/
-static int fm10k_suspend(struct device *dev)
+static int __maybe_unused fm10k_suspend(struct device *dev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
 	struct net_device *netdev = interface->netdev;
@@ -2511,8 +2510,6 @@ static int fm10k_suspend(struct device *dev)
 	return 0;
 }
 
-#endif /* CONFIG_PM */
-
 /**
  * fm10k_io_error_detected - called when PCI error is detected
  * @pdev: Pointer to PCI device
@@ -2643,11 +2640,9 @@ static struct pci_driver fm10k_driver = {
 	.id_table		= fm10k_pci_tbl,
 	.probe			= fm10k_probe,
 	.remove			= fm10k_remove,
-#ifdef CONFIG_PM
 	.driver = {
 		.pm		= &fm10k_pm_ops,
 	},
-#endif /* CONFIG_PM */
 	.sriov_configure	= fm10k_iov_configure,
 	.err_handler		= &fm10k_err_handler
 };
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 01ff5ba6796e..31891ae11c9b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -821,13 +821,18 @@ static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
 	int err;
 
-	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
-	if (err)
-		return err;
 	fib->lpm_tree = new_tree;
 	mlxsw_sp_lpm_tree_hold(new_tree);
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+	if (err)
+		goto err_tree_bind;
 	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
 	return 0;
+
+err_tree_bind:
+	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+	fib->lpm_tree = old_tree;
+	return err;
 }
 
 static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
@@ -868,11 +873,14 @@ err_tree_replace:
 	return err;
 
 no_replace:
-	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
-	if (err)
-		return err;
 	fib->lpm_tree = new_tree;
 	mlxsw_sp_lpm_tree_hold(new_tree);
+	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
+	if (err) {
+		mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
+		fib->lpm_tree = NULL;
+		return err;
+	}
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index ed58c746e4af..f5a7eb22d0f5 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -715,7 +715,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 		/* warning!!!! We are retrieving the virtual ptr in the sw_data
 		 * field as a 32bit value. Will not work on 64bit machines
 		 */
-		page = (struct page *)GET_SW_DATA0(desc);
+		page = (struct page *)GET_SW_DATA0(ndesc);
 
 		if (likely(dma_buff && buf_len && page)) {
 			dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 170a3e89b5af..698874684b4e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -679,6 +679,15 @@ static void tun_queue_purge(struct tun_file *tfile)
 	skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
+static void tun_cleanup_tx_ring(struct tun_file *tfile)
+{
+	if (tfile->tx_ring.queue) {
+		ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+		xdp_rxq_info_unreg(&tfile->xdp_rxq);
+		memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+	}
+}
+
 static void __tun_detach(struct tun_file *tfile, bool clean)
 {
 	struct tun_file *ntfile;
@@ -725,10 +734,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 			    tun->dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(tun->dev);
 		}
-		if (tun) {
-			ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
-			xdp_rxq_info_unreg(&tfile->xdp_rxq);
-		}
+		tun_cleanup_tx_ring(tfile);
 		sock_put(&tfile->sk);
 	}
 }
@@ -770,12 +776,14 @@ static void tun_detach_all(struct net_device *dev)
 		tun_queue_purge(tfile);
 		xdp_rxq_info_unreg(&tfile->xdp_rxq);
 		sock_put(&tfile->sk);
+		tun_cleanup_tx_ring(tfile);
 	}
 	list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
 		tun_enable_queue(tfile);
 		tun_queue_purge(tfile);
 		xdp_rxq_info_unreg(&tfile->xdp_rxq);
 		sock_put(&tfile->sk);
+		tun_cleanup_tx_ring(tfile);
 	}
 	BUG_ON(tun->numdisabled != 0);
 
@@ -3145,6 +3153,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 
 	sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
+	memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring));
+
 	return 0;
 }
 
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index d51d9abf7986..0657203ffb91 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -606,6 +606,7 @@ enum rtl8152_flags {
 	PHY_RESET,
 	SCHEDULE_NAPI,
 	GREEN_ETHERNET,
+	DELL_TB_RX_AGG_BUG,
 };
 
 /* Define these values to match your device */
@@ -1798,6 +1799,9 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 		dev_kfree_skb_any(skb);
 
 		remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
+
+		if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+			break;
 	}
 
 	if (!skb_queue_empty(&skb_head)) {
@@ -4133,6 +4137,9 @@ static void r8153_init(struct r8152 *tp)
 	/* rx aggregation */
 	ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
 	ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+	if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+		ocp_data |= RX_AGG_DISABLE;
+
 	ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
 
 	rtl_tally_reset(tp);
@@ -5207,6 +5214,12 @@ static int rtl8152_probe(struct usb_interface *intf,
 		netdev->hw_features &= ~NETIF_F_RXCSUM;
 	}
 
+	if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 &&
+	    udev->serial && !strcmp(udev->serial, "000001000000")) {
+		dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
+		set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+	}
+
 	netdev->ethtool_ops = &ops;
 	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
index 6a59d0609d30..9be0b051066a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
@@ -182,12 +182,9 @@ static int brcmf_c_process_clm_blob(struct brcmf_if *ifp)
 
 	err = request_firmware(&clm, clm_name, dev);
 	if (err) {
-		if (err == -ENOENT) {
-			brcmf_dbg(INFO, "continue with CLM data currently present in firmware\n");
-			return 0;
-		}
-		brcmf_err("request CLM blob file failed (%d)\n", err);
-		return err;
+		brcmf_info("no clm_blob available(err=%d), device may have limited channels available\n",
+			   err);
+		return 0;
 	}
 
 	chunk_buf = kzalloc(sizeof(*chunk_buf) + MAX_CHUNK_LEN - 1, GFP_KERNEL);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d53550e612bc..4276ebfff22b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -451,10 +451,13 @@ static void **nvme_pci_iod_list(struct request *req)
 static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	int nseg = blk_rq_nr_phys_segments(req);
 	unsigned int avg_seg_size;
 
-	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
-			blk_rq_nr_phys_segments(req));
+	if (nseg == 0)
+		return false;
+
+	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
 
 	if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
 		return false;
@@ -722,20 +725,19 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
 }
 
 static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
-		struct request *req, struct nvme_rw_command *cmd)
+		struct request *req, struct nvme_rw_command *cmd, int entries)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	int length = blk_rq_payload_bytes(req);
 	struct dma_pool *pool;
 	struct nvme_sgl_desc *sg_list;
 	struct scatterlist *sg = iod->sg;
-	int entries = iod->nents, i = 0;
 	dma_addr_t sgl_dma;
+	int i = 0;
 
 	/* setting the transfer type as SGL */
 	cmd->flags = NVME_CMD_SGL_METABUF;
 
-	if (length == sg_dma_len(sg)) {
+	if (entries == 1) {
 		nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
 		return BLK_STS_OK;
 	}
@@ -775,13 +777,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
 		}
 
 		nvme_pci_sgl_set_data(&sg_list[i++], sg);
-
-		length -= sg_dma_len(sg);
 		sg = sg_next(sg);
-		entries--;
-	} while (length > 0);
+	} while (--entries > 0);
 
-	WARN_ON(entries > 0);
 	return BLK_STS_OK;
 }
 
@@ -793,6 +791,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	enum dma_data_direction dma_dir = rq_data_dir(req) ?
 			DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	blk_status_t ret = BLK_STS_IOERR;
+	int nr_mapped;
 
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
@@ -800,12 +799,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		goto out;
 
 	ret = BLK_STS_RESOURCE;
-	if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
-				DMA_ATTR_NO_WARN))
+	nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
+			DMA_ATTR_NO_WARN);
+	if (!nr_mapped)
 		goto out;
 
 	if (iod->use_sgl)
-		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
+		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
 	else
 		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
 
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index b4964b067aec..8f6e8e28996d 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -410,6 +410,10 @@ static struct phy *_of_phy_get(struct device_node *np, int index)
 	if (ret)
 		return ERR_PTR(-ENODEV);
 
+	/* This phy type handled by the usb-phy subsystem for now */
+	if (of_device_is_compatible(args.np, "usb-nop-xceiv"))
+		return ERR_PTR(-ENODEV);
+
 	mutex_lock(&phy_provider_mutex);
 	phy_provider = of_phy_provider_lookup(args.np);
 	if (IS_ERR(phy_provider) || !try_module_get(phy_provider->owner)) {
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index f48a2ee587a4..ee18428a051f 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -31,7 +31,7 @@ config SSB_BLOCKIO
 
 config SSB_PCIHOST_POSSIBLE
 	bool
-	depends on SSB && (PCI = y || PCI = SSB)
+	depends on SSB && (PCI = y || PCI = SSB) && PCI_DRIVERS_LEGACY
 	default y
 
 config SSB_PCIHOST
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 79375fc115d2..d67a72dcb92c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		 * safe because the task has stopped executing permanently.
 		 */
 		if (permitted && (task->flags & PF_DUMPCORE)) {
-			eip = KSTK_EIP(task);
-			esp = KSTK_ESP(task);
+			if (try_get_task_stack(task)) {
+				eip = KSTK_EIP(task);
+				esp = KSTK_ESP(task);
+				put_task_stack(task);
+			}
 		}
 	}
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2272ded07496..631354acfa72 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -219,7 +219,7 @@
 /* Mark a function definition as prohibited from being cloned. */
 #define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
 
-#ifdef RANDSTRUCT_PLUGIN
+#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
 #endif
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 4178d2493547..5e335b6203f4 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -71,7 +71,7 @@ extern void delayacct_init(void);
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
-extern void __delayacct_blkio_end(void);
+extern void __delayacct_blkio_end(struct task_struct *);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
@@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void)
 		__delayacct_blkio_start();
 }
 
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
 {
 	if (current->delays)
-		__delayacct_blkio_end();
+		__delayacct_blkio_end(p);
 	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
 
@@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 {}
 static inline void delayacct_blkio_start(void)
 {}
-static inline void delayacct_blkio_end(void)
+static inline void delayacct_blkio_end(struct task_struct *p)
 {}
 static inline int delayacct_add_tsk(struct taskstats *d,
 					struct task_struct *tsk)
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index bae807eb2933..853291714ae0 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -31,11 +31,17 @@
 #else
 #define MODULE_RANDSTRUCT_PLUGIN
 #endif
+#ifdef RETPOLINE
+#define MODULE_VERMAGIC_RETPOLINE "retpoline "
+#else
+#define MODULE_VERMAGIC_RETPOLINE ""
+#endif
 
 #define VERMAGIC_STRING 						\
 	UTS_RELEASE " "							\
 	MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT 			\
 	MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS	\
 	MODULE_ARCH_VERMAGIC						\
-	MODULE_RANDSTRUCT_PLUGIN
+	MODULE_RANDSTRUCT_PLUGIN					\
+	MODULE_VERMAGIC_RETPOLINE
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 25e723b0dfd4..887e28408f98 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -970,7 +970,7 @@ select_insn:
 		DST = tmp;
 		CONT;
 	ALU_MOD_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		DST = do_div(tmp, (u32) SRC);
@@ -989,7 +989,7 @@ select_insn:
 		DST = div64_u64(DST, SRC);
 		CONT;
 	ALU_DIV_X:
-		if (unlikely(SRC == 0))
+		if (unlikely((u32)SRC == 0))
 			return 0;
 		tmp = (u32) DST;
 		do_div(tmp, (u32) SRC);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2e7a43edf264..8063dffd91a8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1349,6 +1349,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno);
 }
 
+static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
+{
+	const struct bpf_reg_state *reg = cur_regs(env) + regno;
+
+	return reg->type == PTR_TO_CTX;
+}
+
 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 				   const struct bpf_reg_state *reg,
 				   int off, int size, bool strict)
@@ -1728,6 +1735,12 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 		return -EACCES;
 	}
 
+	if (is_ctx_reg(env, insn->dst_reg)) {
+		verbose(env, "BPF_XADD stores into R%d context is not allowed\n",
+			insn->dst_reg);
+		return -EACCES;
+	}
+
 	/* check whether atomic_add can read the memory */
 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 			       BPF_SIZE(insn->code), BPF_READ, -1);
@@ -2478,17 +2491,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
 	dst_reg = &regs[dst];
 
-	if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
-		print_verifier_state(env, state);
-		verbose(env,
-			"verifier internal error: known but bad sbounds\n");
-		return -EINVAL;
-	}
-	if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
-		print_verifier_state(env, state);
-		verbose(env,
-			"verifier internal error: known but bad ubounds\n");
-		return -EINVAL;
+	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
+	    smin_val > smax_val || umin_val > umax_val) {
+		/* Taint dst register if offset had invalid bounds derived from
+		 * e.g. dead branches.
+		 */
+		__mark_reg_unknown(dst_reg);
+		return 0;
 	}
 
 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
@@ -2680,6 +2689,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 	src_known = tnum_is_const(src_reg.var_off);
 	dst_known = tnum_is_const(dst_reg->var_off);
 
+	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
+	    smin_val > smax_val || umin_val > umax_val) {
+		/* Taint dst register if offset had invalid bounds derived from
+		 * e.g. dead branches.
+		 */
+		__mark_reg_unknown(dst_reg);
+		return 0;
+	}
+
 	if (!src_known &&
 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
 		__mark_reg_unknown(dst_reg);
@@ -4661,6 +4679,12 @@ static int do_check(struct bpf_verifier_env *env)
 			if (err)
 				return err;
 
+			if (is_ctx_reg(env, insn->dst_reg)) {
+				verbose(env, "BPF_ST stores into R%d context is not allowed\n",
+					insn->dst_reg);
+				return -EACCES;
+			}
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
@@ -5330,6 +5354,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 	int i, cnt, delta = 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+			/* due to JIT bugs clear upper 32-bits of src register
+			 * before div/mod operation
+			 */
+			insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
+			insn_buf[1] = *insn;
+			cnt = 2;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			continue;
+		}
+
 		if (insn->code != (BPF_JMP | BPF_CALL))
 			continue;
 		if (insn->src_reg == BPF_PSEUDO_CALL)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 2cf06c274e4c..7e4c44538119 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4447,6 +4447,7 @@ static struct cftype cgroup_base_files[] = {
 	},
 	{
 		.name = "cgroup.threads",
+		.flags = CFTYPE_NS_DELEGATABLE,
 		.release = cgroup_procs_release,
 		.seq_start = cgroup_threads_start,
 		.seq_next = cgroup_procs_next,
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 4a1c33416b6a..e2764d767f18 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -51,16 +51,16 @@ void __delayacct_tsk_init(struct task_struct *tsk)
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumalator (@total) and @count
  */
-static void delayacct_end(u64 *start, u64 *total, u32 *count)
+static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
 {
 	s64 ns = ktime_get_ns() - *start;
 	unsigned long flags;
 
 	if (ns > 0) {
-		spin_lock_irqsave(&current->delays->lock, flags);
+		spin_lock_irqsave(lock, flags);
 		*total += ns;
 		(*count)++;
-		spin_unlock_irqrestore(&current->delays->lock, flags);
+		spin_unlock_irqrestore(lock, flags);
 	}
 }
 
@@ -69,17 +69,25 @@ void __delayacct_blkio_start(void)
 	current->delays->blkio_start = ktime_get_ns();
 }
 
-void __delayacct_blkio_end(void)
+/*
+ * We cannot rely on the `current` macro, as we haven't yet switched back to
+ * the process being woken.
+ */
+void __delayacct_blkio_end(struct task_struct *p)
 {
-	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
-		/* Swapin block I/O */
-		delayacct_end(&current->delays->blkio_start,
-			&current->delays->swapin_delay,
-			&current->delays->swapin_count);
-	else	/* Other block I/O */
-		delayacct_end(&current->delays->blkio_start,
-			&current->delays->blkio_delay,
-			&current->delays->blkio_count);
+	struct task_delay_info *delays = p->delays;
+	u64 *total;
+	u32 *count;
+
+	if (p->delays->flags & DELAYACCT_PF_SWAPIN) {
+		total = &delays->swapin_delay;
+		count = &delays->swapin_count;
+	} else {
+		total = &delays->blkio_delay;
+		count = &delays->blkio_count;
+	}
+
+	delayacct_end(&delays->lock, &delays->blkio_start, total, count);
 }
 
 int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
@@ -153,8 +161,10 @@ void __delayacct_freepages_start(void)
 
 void __delayacct_freepages_end(void)
 {
-	delayacct_end(&current->delays->freepages_start,
-			&current->delays->freepages_delay,
-			&current->delays->freepages_count);
+	delayacct_end(
+		&current->delays->lock,
+		&current->delays->freepages_start,
+		&current->delays->freepages_delay,
+		&current->delays->freepages_count);
 }
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 57d0b3657e16..8c5424dd5924 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1878,6 +1878,9 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 	struct futex_q *this, *next;
 	DEFINE_WAKE_Q(wake_q);
 
+	if (nr_wake < 0 || nr_requeue < 0)
+		return -EINVAL;
+
 	/*
 	 * When PI not supported: return -ENOSYS if requeue_pi is true,
 	 * consequently the compiler knows requeue_pi is always false past
@@ -2294,21 +2297,17 @@ static void unqueue_me_pi(struct futex_q *q)
 	spin_unlock(q->lock_ptr);
 }
 
-/*
- * Fixup the pi_state owner with the new owner.
- *
- * Must be called with hash bucket lock held and mm->sem held for non
- * private futexes.
- */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-				struct task_struct *newowner)
+				struct task_struct *argowner)
 {
-	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
 	u32 uval, uninitialized_var(curval), newval;
-	struct task_struct *oldowner;
+	struct task_struct *oldowner, *newowner;
+	u32 newtid;
 	int ret;
 
+	lockdep_assert_held(q->lock_ptr);
+
 	raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
 
 	oldowner = pi_state->owner;
@@ -2317,11 +2316,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 		newtid |= FUTEX_OWNER_DIED;
 
 	/*
-	 * We are here either because we stole the rtmutex from the
-	 * previous highest priority waiter or we are the highest priority
-	 * waiter but have failed to get the rtmutex the first time.
+	 * We are here because either:
+	 *
+	 *  - we stole the lock and pi_state->owner needs updating to reflect
+	 *    that (@argowner == current),
+	 *
+	 * or:
+	 *
+	 *  - someone stole our lock and we need to fix things to point to the
+	 *    new owner (@argowner == NULL).
 	 *
-	 * We have to replace the newowner TID in the user space variable.
+	 * Either way, we have to replace the TID in the user space variable.
 	 * This must be atomic as we have to preserve the owner died bit here.
 	 *
 	 * Note: We write the user space value _before_ changing the pi_state
@@ -2334,6 +2339,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 	 * in the PID check in lookup_pi_state.
 	 */
 retry:
+	if (!argowner) {
+		if (oldowner != current) {
+			/*
+			 * We raced against a concurrent self; things are
+			 * already fixed up. Nothing to do.
+			 */
+			ret = 0;
+			goto out_unlock;
+		}
+
+		if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+			/* We got the lock after all, nothing to fix. */
+			ret = 0;
+			goto out_unlock;
+		}
+
+		/*
+		 * Since we just failed the trylock; there must be an owner.
+		 */
+		newowner = rt_mutex_owner(&pi_state->pi_mutex);
+		BUG_ON(!newowner);
+	} else {
+		WARN_ON_ONCE(argowner != current);
+		if (oldowner == current) {
+			/*
+			 * We raced against a concurrent self; things are
+			 * already fixed up. Nothing to do.
+			 */
+			ret = 0;
+			goto out_unlock;
+		}
+		newowner = argowner;
+	}
+
+	newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+
 	if (get_futex_value_locked(&uval, uaddr))
 		goto handle_fault;
 
@@ -2434,9 +2475,9 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 		 * Got the lock. We might not be the anticipated owner if we
 		 * did a lock-steal - fix up the PI-state in that case:
 		 *
-		 * We can safely read pi_state->owner without holding wait_lock
-		 * because we now own the rt_mutex, only the owner will attempt
-		 * to change it.
+		 * Speculative pi_state->owner read (we don't hold wait_lock);
+		 * since we own the lock pi_state->owner == current is the
+		 * stable state, anything else needs more attention.
 		 */
 		if (q->pi_state->owner != current)
 			ret = fixup_pi_state_owner(uaddr, q, current);
@@ -2444,6 +2485,19 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 	}
 
 	/*
+	 * If we didn't get the lock; check if anybody stole it from us. In
+	 * that case, we need to fix up the uval to point to them instead of
+	 * us, otherwise bad things happen. [10]
+	 *
+	 * Another speculative read; pi_state->owner == current is unstable
+	 * but needs our attention.
+	 */
+	if (q->pi_state->owner == current) {
+		ret = fixup_pi_state_owner(uaddr, q, NULL);
+		goto out;
+	}
+
+	/*
 	 * Paranoia check. If we did not take the lock, then we should not be
 	 * the owner of the rt_mutex.
 	 */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 6f3dba6e4e9e..65cc0cb984e6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1290,6 +1290,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	return ret;
 }
 
+static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
+{
+	int ret = try_to_take_rt_mutex(lock, current, NULL);
+
+	/*
+	 * try_to_take_rt_mutex() sets the lock waiters bit
+	 * unconditionally. Clean this up.
+	 */
+	fixup_rt_mutex_waiters(lock);
+
+	return ret;
+}
+
 /*
  * Slow path try-lock function:
  */
@@ -1312,13 +1325,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
 	 */
 	raw_spin_lock_irqsave(&lock->wait_lock, flags);
 
-	ret = try_to_take_rt_mutex(lock, current, NULL);
-
-	/*
-	 * try_to_take_rt_mutex() sets the lock waiters bit
-	 * unconditionally. Clean this up.
-	 */
-	fixup_rt_mutex_waiters(lock);
+	ret = __rt_mutex_slowtrylock(lock);
 
 	raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
 
@@ -1505,6 +1512,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
 	return rt_mutex_slowtrylock(lock);
 }
 
+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+{
+	return __rt_mutex_slowtrylock(lock);
+}
+
 /**
  * rt_mutex_timed_lock - lock a rt_mutex interruptible
  *			the timeout structure is provided
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 124e98ca0b17..68686b3ec3c1 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -148,6 +148,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
 				 struct rt_mutex_waiter *waiter);
 
 extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
 
 extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
 extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 644fa2e3d993..a7bf32aabfda 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2056,7 +2056,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	p->state = TASK_WAKING;
 
 	if (p->in_iowait) {
-		delayacct_blkio_end();
+		delayacct_blkio_end(p);
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
@@ -2069,7 +2069,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 #else /* CONFIG_SMP */
 
 	if (p->in_iowait) {
-		delayacct_blkio_end();
+		delayacct_blkio_end(p);
 		atomic_dec(&task_rq(p)->nr_iowait);
 	}
 
@@ -2122,7 +2122,7 @@ static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf)
 
 	if (!task_on_rq_queued(p)) {
 		if (p->in_iowait) {
-			delayacct_blkio_end();
+			delayacct_blkio_end(p);
 			atomic_dec(&rq->nr_iowait);
 		}
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 89a9e1b4264a..0bcf00e3ce48 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1696,7 +1696,7 @@ void run_local_timers(void)
 	hrtimer_run_queues();
 	/* Raise the softirq only if required. */
 	if (time_before(jiffies, base->clk)) {
-		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active)
+		if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
 			return;
 		/* CPU is awake, so check the deferrable base. */
 		base++;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0cddf60186da..5af2842dea96 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2579,8 +2579,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 		bit = RB_CTX_NORMAL;
 	else
 		bit = pc & NMI_MASK ? RB_CTX_NMI :
-			pc & HARDIRQ_MASK ? RB_CTX_IRQ :
-			pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ;
+			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 
 	if (unlikely(val & (1 << bit)))
 		return 1;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index ec0f9aa4e151..1b87157edbff 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2213,6 +2213,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
 {
 	struct trace_event_call *call, *p;
 	const char *last_system = NULL;
+	bool first = false;
 	int last_i;
 	int i;
 
@@ -2220,15 +2221,28 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
 		/* events are usually grouped together with systems */
 		if (!last_system || call->class->system != last_system) {
+			first = true;
 			last_i = 0;
 			last_system = call->class->system;
 		}
 
+		/*
+		 * Since calls are grouped by systems, the likelyhood that the
+		 * next call in the iteration belongs to the same system as the
+		 * previous call is high. As an optimization, we skip seaching
+		 * for a map[] that matches the call's system if the last call
+		 * was from the same system. That's what last_i is for. If the
+		 * call has the same system as the previous call, then last_i
+		 * will be the index of the first map[] that has a matching
+		 * system.
+		 */
 		for (i = last_i; i < len; i++) {
 			if (call->class->system == map[i]->system) {
 				/* Save the first system if need be */
-				if (!last_i)
+				if (first) {
 					last_i = i;
+					first = false;
+				}
 				update_event_printk(call, map[i]);
 			}
 		}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 43d18cb46308..f699122dab32 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,6 +48,7 @@
 #include <linux/moduleparam.h>
 #include <linux/uaccess.h>
 #include <linux/sched/isolation.h>
+#include <linux/nmi.h>
 
 #include "workqueue_internal.h"
 
@@ -4463,6 +4464,12 @@ void show_workqueue_state(void)
 			if (pwq->nr_active || !list_empty(&pwq->delayed_works))
 				show_pwq(pwq);
 			spin_unlock_irqrestore(&pwq->pool->lock, flags);
+			/*
+			 * We could be printing a lot from atomic context, e.g.
+			 * sysrq-t -> show_workqueue_state(). Avoid triggering
+			 * hard lockup.
+			 */
+			touch_nmi_watchdog();
 		}
 	}
 
@@ -4490,6 +4497,12 @@ void show_workqueue_state(void)
 		pr_cont("\n");
 	next_pool:
 		spin_unlock_irqrestore(&pool->lock, flags);
+		/*
+		 * We could be printing a lot from atomic context, e.g.
+		 * sysrq-t -> show_workqueue_state(). Avoid triggering
+		 * hard lockup.
+		 */
+		touch_nmi_watchdog();
 	}
 
 	rcu_read_unlock_sched();
diff --git a/mm/memory.c b/mm/memory.c
index ca5674cbaff2..793004608332 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2857,8 +2857,11 @@ int do_swap_page(struct vm_fault *vmf)
 	int ret = 0;
 	bool vma_readahead = swap_use_vma_readahead();
 
-	if (vma_readahead)
+	if (vma_readahead) {
 		page = swap_readahead_detect(vmf, &swap_ra);
+		swapcache = page;
+	}
+
 	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
 		if (page)
 			put_page(page);
@@ -2889,9 +2892,12 @@ int do_swap_page(struct vm_fault *vmf)
 
 
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-	if (!page)
+	if (!page) {
 		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
 					 vmf->address);
+		swapcache = page;
+	}
+
 	if (!page) {
 		struct swap_info_struct *si = swp_swap_info(entry);
 
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 8592543a0f15..270a8219ccd0 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -616,7 +616,6 @@ static void init_early_allocated_pages(void)
 {
 	pg_data_t *pgdat;
 
-	drain_all_pages(NULL);
 	for_each_online_pgdat(pgdat)
 		init_zones_in_node(pgdat);
 }
diff --git a/net/can/af_can.c b/net/can/af_can.c
index f22b886ed081..6da324550eec 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -721,20 +721,16 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-	if (WARN_ONCE(dev->type != ARPHRD_CAN ||
-		      skb->len != CAN_MTU ||
-		      cfd->len > CAN_MAX_DLEN,
-		      "PF_CAN: dropped non conform CAN skbuf: "
-		      "dev type %d, len %d, datalen %d\n",
-		      dev->type, skb->len, cfd->len))
-		goto drop;
+	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
+		     cfd->len > CAN_MAX_DLEN)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+			     dev->type, skb->len, cfd->len);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
 
 	can_receive(skb, dev);
 	return NET_RX_SUCCESS;
-
-drop:
-	kfree_skb(skb);
-	return NET_RX_DROP;
 }
 
 static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -742,20 +738,16 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-	if (WARN_ONCE(dev->type != ARPHRD_CAN ||
-		      skb->len != CANFD_MTU ||
-		      cfd->len > CANFD_MAX_DLEN,
-		      "PF_CAN: dropped non conform CAN FD skbuf: "
-		      "dev type %d, len %d, datalen %d\n",
-		      dev->type, skb->len, cfd->len))
-		goto drop;
+	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
+		     cfd->len > CANFD_MAX_DLEN)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+			     dev->type, skb->len, cfd->len);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
 
 	can_receive(skb, dev);
 	return NET_RX_SUCCESS;
-
-drop:
-	kfree_skb(skb);
-	return NET_RX_DROP;
 }
 
 /*
diff --git a/net/core/filter.c b/net/core/filter.c
index db2ee8c7e1bd..08491b3b8742 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -458,6 +458,10 @@ do_pass:
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
+			if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
+			    fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+				*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+
 			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 02db7b122a73..559db9ea8d86 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1031,8 +1031,8 @@ ip_proto_again:
 out_good:
 	ret = true;
 
-	key_control->thoff = (u16)nhoff;
 out:
+	key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
 	key_basic->n_proto = proto;
 	key_basic->ip_proto = ip_proto;
 
@@ -1040,7 +1040,6 @@ out:
 
 out_bad:
 	ret = false;
-	key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
 	goto out;
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b7c4befe67ec..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1223,8 +1223,14 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		}
 
 		if (!rcu_access_pointer(fn->leaf)) {
-			atomic_inc(&rt->rt6i_ref);
-			rcu_assign_pointer(fn->leaf, rt);
+			if (fn->fn_flags & RTN_TL_ROOT) {
+				/* put back null_entry for root node */
+				rcu_assign_pointer(fn->leaf,
+					    info->nl_net->ipv6.ip6_null_entry);
+			} else {
+				atomic_inc(&rt->rt6i_ref);
+				rcu_assign_pointer(fn->leaf, rt);
+			}
 		}
 		fn = sn;
 	}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index db99446e0276..a88480193d77 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -352,11 +352,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 
 	nt->dev = dev;
 	nt->net = dev_net(dev);
-	ip6gre_tnl_link_config(nt, 1);
 
 	if (register_netdevice(dev) < 0)
 		goto failed_free;
 
+	ip6gre_tnl_link_config(nt, 1);
+
 	/* Can use a lockless transmit, unless we generate output sequences */
 	if (!(nt->parms.o_flags & TUNNEL_SEQ))
 		dev->features |= NETIF_F_LLTX;
@@ -1709,7 +1710,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 
 static int ip6gre_tap_init(struct net_device *dev)
 {
-	struct ip6_tnl *tunnel;
 	int ret;
 
 	ret = ip6gre_tunnel_init_common(dev);
@@ -1718,10 +1718,6 @@ static int ip6gre_tap_init(struct net_device *dev)
 
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
-	tunnel = netdev_priv(dev);
-
-	ip6gre_tnl_link_config(tunnel, 1);
-
 	return 0;
 }
 
@@ -1872,12 +1868,16 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
 
 	nt->dev = dev;
 	nt->net = dev_net(dev);
-	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
 
 	err = register_netdevice(dev);
 	if (err)
 		goto out;
 
+	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+	if (tb[IFLA_MTU])
+		ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+
 	dev_hold(dev);
 	ip6gre_tunnel_link(ign, nt);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 54cbf5b9864c..2ad445c1d27c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2424,6 +2424,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 	while (skb->len >= nlmsg_total_size(0)) {
 		int msglen;
 
+		memset(&extack, 0, sizeof(extack));
 		nlh = nlmsg_hdr(skb);
 		err = 0;
 
@@ -2438,7 +2439,6 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
 			goto ack;
 
-		memset(&extack, 0, sizeof(extack));
 		err = cb(skb, nlh, &extack);
 		if (err == -EINTR)
 			goto skip;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fc024fc3ec2f..988ad45d78b8 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -186,10 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	return 0;
 }
 
+static u32 cls_bpf_flags(u32 flags)
+{
+	return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
+}
+
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			   struct cls_bpf_prog *oldprog)
 {
-	if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+	if (prog && oldprog &&
+	    cls_bpf_flags(prog->gen_flags) !=
+	    cls_bpf_flags(oldprog->gen_flags))
 		return -EINVAL;
 
 	if (prog && tc_skip_hw(prog->gen_flags))
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index e07ee3ae0023..736719c8314e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 
 	crypto_info = &ctx->crypto_send;
 	/* Currently we don't support set crypto info more than one time */
-	if (TLS_CRYPTO_INFO_READY(crypto_info))
+	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+		rc = -EBUSY;
 		goto out;
+	}
 
 	rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
 	if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	case TLS_CIPHER_AES_GCM_128: {
 		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
 			rc = -EINVAL;
-			goto out;
+			goto err_crypto_info;
 		}
 		rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
 				    optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 	}
 	default:
 		rc = -EINVAL;
-		goto out;
+		goto err_crypto_info;
 	}
 
 	/* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
 	struct tls_context *ctx;
 	int rc = 0;
 
+	/* The TLS ulp is currently supported only for TCP sockets
+	 * in ESTABLISHED state.
+	 * Supporting sockets in LISTEN state will require us
+	 * to modify the accept implementation to clone rather then
+	 * share the ulp context.
+	 */
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return -ENOTSUPP;
+
 	/* allocate tls context */
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9773571b6a34..61f394d369bf 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -681,18 +681,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 	}
 	default:
 		rc = -EINVAL;
-		goto out;
+		goto free_priv;
 	}
 
 	ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
 	ctx->tag_size = tag_size;
 	ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
 	ctx->iv_size = iv_size;
-	ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			  GFP_KERNEL);
+	ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
 	if (!ctx->iv) {
 		rc = -ENOMEM;
-		goto out;
+		goto free_priv;
 	}
 	memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
 	memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +739,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 
 	rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
 	if (!rc)
-		goto out;
+		return 0;
 
 free_aead:
 	crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +750,9 @@ free_rec_seq:
 free_iv:
 	kfree(ctx->iv);
 	ctx->iv = NULL;
+free_priv:
+	kfree(ctx->priv_ctx);
+	ctx->priv_ctx = NULL;
 out:
 	return rc;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b48eb6d104c9..ab0c687d0c44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9835,7 +9835,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 	 */
 	if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss &&
 	    rdev->ops->get_station) {
-		struct station_info sinfo;
+		struct station_info sinfo = {};
 		u8 *mac_addr;
 
 		mac_addr = wdev->current_bss->pub.bssid;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7ca04a7de85a..05186a47878f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1254,8 +1254,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
-	/* we are under RTNL - globally locked - so can use a static struct */
-	static struct station_info sinfo;
+	struct station_info sinfo = {};
 	u8 addr[ETH_ALEN];
 	int err;
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index cb8997ed0149..47cddf32aeba 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -265,12 +265,18 @@ else
 objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
 
+ifdef CONFIG_MODVERSIONS
+objtool_o = $(@D)/.tmp_$(@F)
+else
+objtool_o = $(@)
+endif
+
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file
 cmd_objtool = $(if $(patsubst y%,, \
 	$(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \
-	$(__objtool_obj) $(objtool_args) "$(@)";)
+	$(__objtool_obj) $(objtool_args) "$(objtool_o)";)
 objtool_obj = $(if $(patsubst y%,, \
 	$(OBJECT_FILES_NON_STANDARD_$(basetarget).o)$(OBJECT_FILES_NON_STANDARD)n), \
 	$(__objtool_obj))
@@ -286,16 +292,16 @@ objtool_dep = $(objtool_obj)					\
 define rule_cc_o_c
 	$(call echo-cmd,checksrc) $(cmd_checksrc)			  \
 	$(call cmd_and_fixdep,cc_o_c)					  \
-	$(cmd_modversions_c)						  \
 	$(cmd_checkdoc)							  \
 	$(call echo-cmd,objtool) $(cmd_objtool)				  \
+	$(cmd_modversions_c)						  \
 	$(call echo-cmd,record_mcount) $(cmd_record_mcount)
 endef
 
 define rule_as_o_S
 	$(call cmd_and_fixdep,as_o_S)					  \
-	$(cmd_modversions_S)						  \
-	$(call echo-cmd,objtool) $(cmd_objtool)
+	$(call echo-cmd,objtool) $(cmd_objtool)				  \
+	$(cmd_modversions_S)
 endef
 
 # List module undefined symbols (or empty line if not enabled)
diff --git a/scripts/decodecode b/scripts/decodecode
index 438120da1361..5ea071099330 100755
--- a/scripts/decodecode
+++ b/scripts/decodecode
@@ -59,6 +59,14 @@ disas() {
 		${CROSS_COMPILE}strip $1.o
 	fi
 
+	if [ "$ARCH" = "arm64" ]; then
+		if [ $width -eq 4 ]; then
+			type=inst
+		fi
+
+		${CROSS_COMPILE}strip $1.o
+	fi
+
 	${CROSS_COMPILE}objdump $OBJDUMPFLAGS -S $1.o | \
 		grep -v "/tmp\|Disassembly\|\.text\|^$" > $1.dis 2>&1
 }
diff --git a/scripts/gdb/linux/tasks.py b/scripts/gdb/linux/tasks.py
index 1bf949c43b76..f6ab3ccf698f 100644
--- a/scripts/gdb/linux/tasks.py
+++ b/scripts/gdb/linux/tasks.py
@@ -96,6 +96,8 @@ def get_thread_info(task):
         thread_info_addr = task.address + ia64_task_size
         thread_info = thread_info_addr.cast(thread_info_ptr_type)
     else:
+        if task.type.fields()[0].type == thread_info_type.get_type():
+            return task['thread_info']
         thread_info = task['stack'].cast(thread_info_ptr_type)
     return thread_info.dereference()
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 24460155c82c..c1c338661699 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -26,6 +26,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <errno.h>
 
 #include "elf.h"
 #include "warn.h"
@@ -358,7 +359,8 @@ struct elf *elf_open(const char *name, int flags)
 
 	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
-		perror("open");
+		fprintf(stderr, "objtool: Can't open '%s': %s\n",
+			name, strerror(errno));
 		goto err;
 	}
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 960179882a1c..38c0f850f315 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2594,6 +2594,29 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"context stores via ST",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_ST stores into R1 context is not allowed",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"context stores via XADD",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
+				     BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"direct packet access: test1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -4313,7 +4336,8 @@ static struct bpf_test tests[] = {
 		.fixup_map1 = { 2 },
 		.errstr_unpriv = "R2 leaks addr into mem",
 		.result_unpriv = REJECT,
-		.result = ACCEPT,
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
 	},
 	{
 		"leak pointer into ctx 2",
@@ -4327,7 +4351,8 @@ static struct bpf_test tests[] = {
 		},
 		.errstr_unpriv = "R10 leaks addr into mem",
 		.result_unpriv = REJECT,
-		.result = ACCEPT,
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
 	},
 	{
 		"leak pointer into ctx 3",
@@ -6708,7 +6733,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
 		.fixup_map1 = { 4 },
-		.errstr = "unbounded min value",
+		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
 	},
 	{
@@ -8610,6 +8635,127 @@ static struct bpf_test tests[] = {
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
+		"check deducing bounds from const, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 tried to subtract pointer from scalar",
+	},
+	{
+		"check deducing bounds from const, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check deducing bounds from const, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 tried to subtract pointer from scalar",
+	},
+	{
+		"check deducing bounds from const, 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check deducing bounds from const, 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 tried to subtract pointer from scalar",
+	},
+	{
+		"check deducing bounds from const, 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 tried to subtract pointer from scalar",
+	},
+	{
+		"check deducing bounds from const, 7",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, ~0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"check deducing bounds from const, 8",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, ~0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"check deducing bounds from const, 9",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 tried to subtract pointer from scalar",
+	},
+	{
+		"check deducing bounds from const, 10",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+			/* Marks reg as unknown. */
+			BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+	},
+	{
 		"bpf_exit with invalid return code. test1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),