31 files changed, 378 insertions, 75 deletions
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3a510f4a6b68..77e2cefe47eb 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -64,17 +64,17 @@ config PPC_EMULATED_STATS
 	  emulated.
 
 config CODE_PATCHING_SELFTEST
-	bool "Run self-tests of the code-patching code."
+	bool "Run self-tests of the code-patching code"
 	depends on DEBUG_KERNEL
 	default n
 
 config FTR_FIXUP_SELFTEST
-	bool "Run self-tests of the feature-fixup code."
+	bool "Run self-tests of the feature-fixup code"
 	depends on DEBUG_KERNEL
 	default n
 
 config MSI_BITMAP_SELFTEST
-	bool "Run self-tests of the MSI bitmap code."
+	bool "Run self-tests of the MSI bitmap code"
 	depends on DEBUG_KERNEL
 	default n
 
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 9e861b4378bd..2ff8b3df553d 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -33,7 +33,12 @@
 #define _PAGE_F_GIX_SHIFT	12
 #define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
 #define _PAGE_SPECIAL		0x10000 /* software: special page */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SOFT_DIRTY	0x20000 /* software: software dirty tracking */
+#else
+#define _PAGE_SOFT_DIRTY	0x00000
+#endif
 
 /*
  * THP pages can't be special. So use the _PAGE_SPECIAL
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 03c1a5a21c0c..b3a5badab69f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -158,12 +158,22 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
 					((type) << _PAGE_BIT_SWAP_TYPE) \
 					| ((offset) << PTE_RPN_SHIFT) })
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
 
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
-#define __swp_entry_to_pte(x)		__pte((x).val)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+#else
+#define _PAGE_SWP_SOFT_DIRTY	0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
@@ -176,8 +186,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
 }
-#else
-#define _PAGE_SWP_SOFT_DIRTY	0
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 8374afed9d0a..f8faaaeeca1e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -157,7 +157,8 @@
 #define OPAL_LEDS_GET_INDICATOR			114
 #define OPAL_LEDS_SET_INDICATOR			115
 #define OPAL_CEC_REBOOT2			116
-#define OPAL_LAST				116
+#define OPAL_CONSOLE_FLUSH			117
+#define OPAL_LAST				117
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 800115910e43..07a99e638449 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -35,6 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
 					__be64 *length);
+int64_t opal_console_flush(int64_t term_number);
 int64_t opal_rtc_read(__be32 *year_month_day,
 		      __be64 *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
@@ -262,6 +263,8 @@ extern int opal_resync_timebase(void);
 
 extern void opal_lpc_init(void);
 
+extern void opal_kmsg_init(void);
+
 extern int opal_event_request(unsigned int opal_event_nr);
 
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 70bd4381f8e6..546540b91095 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -16,6 +16,7 @@
 
 #ifdef CONFIG_PPC64
 
+#include <linux/string.h>
 #include <asm/types.h>
 #include <asm/lppaca.h>
 #include <asm/mmu.h>
@@ -131,7 +132,16 @@ struct paca_struct {
 	struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
-	mm_context_t context;
+#ifdef CONFIG_PPC_BOOK3S
+	mm_context_id_t mm_ctx_id;
+#ifdef CONFIG_PPC_MM_SLICES
+	u64 mm_ctx_low_slices_psize;
+	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+	u16 mm_ctx_user_psize;
+	u16 mm_ctx_sllp;
+#endif
+#endif
 
 	/*
 	 * then miscellaneous read-write fields
@@ -194,6 +204,23 @@ struct paca_struct {
 #endif
 };
 
+#ifdef CONFIG_PPC_BOOK3S
+static inline void copy_mm_to_paca(mm_context_t *context)
+{
+	get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+	memcpy(&get_paca()->mm_ctx_high_slices_psize,
+	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
+#else
+	get_paca()->mm_ctx_user_psize = context->user_psize;
+	get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+}
+#else
+static inline void copy_mm_to_paca(mm_context_t *context){}
+#endif
+
 extern struct paca_struct *paca;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 43686043e297..8dde19962a5b 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -43,5 +43,7 @@
 #define PPC_FEATURE2_TAR		0x04000000
 #define PPC_FEATURE2_VEC_CRYPTO		0x02000000
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
+#define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
+#define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 59dad113897b..c2d21d11c2d2 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -295,6 +295,8 @@ do {									\
 #define R_PPC64_TLSLD		108
 #define R_PPC64_TOCSAVE		109
 
+#define R_PPC64_ENTRY		118
+
 #define R_PPC64_REL16		249
 #define R_PPC64_REL16_LO	250
 #define R_PPC64_REL16_HI	251
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 221d584d089f..07cebc3514f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,14 +185,16 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
-	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_BOOK3S
+	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
-					    context.low_slices_psize));
+					    mm_ctx_low_slices_psize));
 	DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
-					    context.high_slices_psize));
+					    mm_ctx_high_slices_psize));
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
+#endif
 
 #ifdef CONFIG_PPC_BOOK3E
 	DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
@@ -222,7 +224,7 @@ int main(void)
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
 #else
-	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp));
 #endif /* CONFIG_PPC_MM_SLICES */
 	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
 	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 68384514506b..59663af9315f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			 */
 			break;
 
+		case R_PPC64_ENTRY:
+			/*
+			 * Optimize ELFv2 large code model entry point if
+			 * the TOC is within 2GB range of current location.
+			 */
+			value = my_r2(sechdrs, me) - (unsigned long)location;
+			if (value + 0x80008000 > 0xffffffff)
+				break;
+			/*
+			 * Check for the large code model prolog sequence:
+		         *	ld r2, ...(r12)
+			 *	add r2, r2, r12
+			 */
+			if ((((uint32_t *)location)[0] & ~0xfffc)
+			    != 0xe84c0000)
+				break;
+			if (((uint32_t *)location)[1] != 0x7c426214)
+				break;
+			/*
+			 * If found, replace it with:
+			 *	addis r2, r12, (.TOC.-func)@ha
+			 *	addi r2, r12, (.TOC.-func)@l
+			 */
+			((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
+			((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+			break;
+
 		case R_PPC64_REL16_HA:
 			/* Subtract location pointer */
 			value -= (unsigned long)location;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 92dea8df6b26..da5192590c44 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -389,6 +389,7 @@ static void __init prom_printf(const char *format, ...)
 			break;
 		}
 	}
+	va_end(args);
 }
 
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index ea43a347a104..4f24606afc3f 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -61,3 +61,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	save_context_stack(trace, tsk->thread.ksp, tsk, 0);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void
+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	save_context_stack(trace, regs->gpr[1], current, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 4233dcccbaf7..ba59d5977f34 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -853,11 +853,11 @@ static unsigned int get_paca_psize(unsigned long addr)
 	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		lpsizes = get_paca()->context.low_slices_psize;
+		lpsizes = get_paca()->mm_ctx_low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
 		return (lpsizes >> (index * 4)) & 0xF;
 	}
-	hpsizes = get_paca()->context.high_slices_psize;
+	hpsizes = get_paca()->mm_ctx_high_slices_psize;
 	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
 	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
@@ -866,7 +866,7 @@ static unsigned int get_paca_psize(unsigned long addr)
 #else
 unsigned int get_paca_psize(unsigned long addr)
 {
-	return get_paca()->context.user_psize;
+	return get_paca()->mm_ctx_user_psize;
 }
 #endif
 
@@ -882,7 +882,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
 	copro_flush_all_slbs(mm);
 	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
-		get_paca()->context = mm->context;
+
+		copy_mm_to_paca(&mm->context);
 		slb_flush_and_rebolt();
 	}
 }
@@ -949,7 +950,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
 {
 	if (user_region) {
 		if (psize != get_paca_psize(ea)) {
-			get_paca()->context = mm->context;
+			copy_mm_to_paca(&mm->context);
 			slb_flush_and_rebolt();
 		}
 	} else if (get_paca()->vmalloc_sllp !=
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 515730e499fe..825b6873391f 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -228,7 +228,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		asm volatile("slbie %0" : : "r" (slbie_data));
 
 	get_paca()->slb_cache_ptr = 0;
-	get_paca()->context = mm->context;
+	copy_mm_to_paca(&mm->context);
 
 	/*
 	 * preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0f432a702870..42954f0b47ac 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -185,8 +185,7 @@ static void slice_flush_segments(void *parm)
 	if (mm != current->active_mm)
 		return;
 
-	/* update the paca copy of the context struct */
-	get_paca()->context = current->active_mm->context;
+	copy_mm_to_paca(&current->active_mm->context);
 
 	local_irq_save(flags);
 	slb_flush_and_rebolt();
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 76f5013c35e5..c3c9bbb3573a 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -84,6 +84,7 @@ static void __init bootx_printf(const char *format, ...)
 			break;
 		}
 	}
+	va_end(args);
 }
 #else /* CONFIG_BOOTX_TEXT */
 static void __init bootx_printf(const char *format, ...) {}
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index ee774e8a4837..f1516b5ecec9 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,6 +2,7 @@ obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o idle.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y			+= opal-kmsg.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 000000000000..6f1214d4de92
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,75 @@
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware.  The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output.  In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed.  This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void force_opal_console_flush(struct kmsg_dumper *dumper,
+				     enum kmsg_dump_reason reason)
+{
+	int i;
+	int64_t ret;
+
+	/*
+	 * Outside of a panic context the pollers will continue to run,
+	 * so we don't need to do any special flushing.
+	 */
+	if (reason != KMSG_DUMP_PANIC)
+		return;
+
+	if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
+		ret = opal_console_flush(0);
+
+		if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+			return;
+
+		/* Incrementally flush until there's nothing left */
+		while (opal_console_flush(0) != OPAL_SUCCESS);
+	} else {
+		/*
+		 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+		 * the console can still be flushed by calling the polling
+		 * function enough times to flush the buffer.  We don't know
+		 * how much output still needs to be flushed, but we can be
+		 * generous since the kernel is in panic and doesn't need
+		 * to do much else.
+		 */
+		printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
+		for (i = 0; i < 1024; i++) {
+			opal_poll_events(NULL);
+		}
+	}
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+	.dump = force_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+	int rc;
+
+	/* Add our dumper to the list */
+	rc = kmsg_dump_register(&opal_kmsg_dumper);
+	if (rc != 0)
+		pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 1b149c92fca1..f8868864f373 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -50,7 +50,7 @@ unsigned long __init opal_get_boot_time(void)
 		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
-		else
+		else if (rc == OPAL_BUSY)
 			mdelay(10);
 	}
 	if (rc != OPAL_SUCCESS)
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index b7a464fef7a7..e45b88a5d7e0 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
 OPAL_CALL(opal_prd_msg,				OPAL_PRD_MSG);
 OPAL_CALL(opal_leds_get_ind,			OPAL_LEDS_GET_INDICATOR);
 OPAL_CALL(opal_leds_set_ind,			OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush,			OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aad0033d65d1..81f4a3ab8743 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -548,7 +548,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
 		goto out;
 
 	if ((regs->nip >= opal.base) &&
-			(regs->nip <= (opal.base + opal.size)))
+			(regs->nip < (opal.base + opal.size)))
 		recover_addr = find_recovery_address(regs->nip);
 
 	/*
@@ -748,6 +748,9 @@ static int __init opal_init(void)
 	opal_pdev_init(opal_node, "ibm,opal-flash");
 	opal_pdev_init(opal_node, "ibm,opal-prd");
 
+	/* Initialise OPAL kmsg dumper for flushing console on panic */
+	opal_kmsg_init();
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 323e1e58da93..573ae1994097 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1074,16 +1074,75 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 	pnv_ioda_link_pe_by_weight(phb, pe);
 }
 
-static void pnv_ioda_setup_dev_PEs(struct pci_bus *bus)
+static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
+{
+	int pe_num, found_pe = false, rc;
+	long rid;
+	struct pnv_ioda_pe *pe;
+	struct pci_dev *gpu_pdev;
+	struct pci_dn *npu_pdn;
+	struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	/*
+	 * Due to a hardware errata PE#0 on the NPU is reserved for
+	 * error handling. This means we only have three PEs remaining
+	 * which need to be assigned to four links, implying some
+	 * links must share PEs.
+	 *
+	 * To achieve this we assign PEs such that NPUs linking the
+	 * same GPU get assigned the same PE.
+	 */
+	gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
+	for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+		pe = &phb->ioda.pe_array[pe_num];
+		if (!pe->pdev)
+			continue;
+
+		if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
+			/*
+			 * This device has the same peer GPU so should
+			 * be assigned the same PE as the existing
+			 * peer NPU.
+			 */
+			dev_info(&npu_pdev->dev,
+				"Associating to existing PE %d\n", pe_num);
+			pci_dev_get(npu_pdev);
+			npu_pdn = pci_get_pdn(npu_pdev);
+			rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
+			npu_pdn->pcidev = npu_pdev;
+			npu_pdn->pe_number = pe_num;
+			pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
+			phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+			/* Map the PE to this link */
+			rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
+					OpalPciBusAll,
+					OPAL_COMPARE_RID_DEVICE_NUMBER,
+					OPAL_COMPARE_RID_FUNCTION_NUMBER,
+					OPAL_MAP_PE);
+			WARN_ON(rc != OPAL_SUCCESS);
+			found_pe = true;
+			break;
+		}
+	}
+
+	if (!found_pe)
+		/*
+		 * Could not find an existing PE so allocate a new
+		 * one.
+		 */
+		return pnv_ioda_setup_dev_PE(npu_pdev);
+	else
+		return pe;
+}
+
+static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
 {
-	struct pci_bus *child;
 	struct pci_dev *pdev;
 
 	list_for_each_entry(pdev, &bus->devices, bus_list)
-		pnv_ioda_setup_dev_PE(pdev);
-
-	list_for_each_entry(child, &bus->children, node)
-		pnv_ioda_setup_dev_PEs(child);
+		pnv_ioda_setup_npu_PE(pdev);
 }
 
 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
@@ -1127,9 +1186,11 @@ static void pnv_pci_ioda_setup_PEs(void)
 		 * functions. PCI bus dependent PEs are required for the
 		 * remaining types of PHBs.
 		 */
-		if (phb->type == PNV_PHB_NPU)
-			pnv_ioda_setup_dev_PEs(hose->bus);
-		else
+		if (phb->type == PNV_PHB_NPU) {
+			/* PE#0 is needed for error reporting */
+			pnv_ioda_reserve_pe(phb, 0);
+			pnv_ioda_setup_npu_PEs(hose->bus);
+		} else
 			pnv_ioda_setup_PEs(hose->bus);
 	}
 }
@@ -1612,7 +1673,10 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 
 	/* Update peer npu devices */
 	if (pe->flags & PNV_IODA_PE_PEER)
-		for (i = 0; pe->peers[i]; i++) {
+		for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+			if (!pe->peers[i])
+				continue;
+
 			linked_npu_dev = pe->peers[i]->pdev;
 			if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
 				dma_set_mask(&linked_npu_dev->dev, dma_mask);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ff4e42d9d259..2f55c86df703 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1,8 +1,6 @@
 /*
  * Support PCI/PCIe on PowerNV platforms
  *
- * Currently supports only P5IOC2
- *
  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index 6982f603fadc..be2ac5ce349f 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -1,4 +1,5 @@
-ccflags-y := -Werror -Wno-unused-const-variable
+ccflags-y			:= $(call cc-disable-warning, unused-const-variable)
+ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
 
 cxl-y				+= main.o file.o irq.o fault.o native.o
 cxl-y				+= context.o sysfs.o debugfs.o pci.o trace.o
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a6543aefa299..ea3eeb7011e1 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -172,7 +172,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 
 	if (task) {
 		ctx->pid = get_task_pid(task, PIDTYPE_PID);
-		get_pid(ctx->pid);
+		ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
 		kernel = false;
 	}
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 6dde7a9d6a7e..262b88eac414 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -42,7 +42,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
 	spin_lock_init(&ctx->sste_lock);
 	ctx->afu = afu;
 	ctx->master = master;
-	ctx->pid = NULL; /* Set in start work ioctl */
+	ctx->pid = ctx->glpid = NULL; /* Set in start work ioctl */
 	mutex_init(&ctx->mapping_lock);
 	ctx->mapping = mapping;
 
@@ -217,7 +217,11 @@ int __detach_context(struct cxl_context *ctx)
 	WARN_ON(cxl_detach_process(ctx) &&
 		cxl_adapter_link_ok(ctx->afu->adapter));
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
+
+	/* release the reference to the group leader and mm handling pid */
 	put_pid(ctx->pid);
+	put_pid(ctx->glpid);
+
 	cxl_ctx_put();
 	return 0;
 }
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 25ae57fa79b0..a521bc72cec2 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -445,6 +445,9 @@ struct cxl_context {
 	unsigned int sst_size, sst_lru;
 
 	wait_queue_head_t wq;
+	/* pid of the group leader associated with the pid */
+	struct pid *glpid;
+	/* use mm context associated with this pid for ds faults */
 	struct pid *pid;
 	spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */
 	/* Only used in PR mode */
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 25a5418c55cb..81c3f75b7330 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -166,13 +166,92 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
 	cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 }
 
+/*
+ * Returns the mm_struct corresponding to the context ctx via ctx->pid
+ * In case the task has exited we use the task group leader accessible
+ * via ctx->glpid to find the next task in the thread group that has a
+ * valid  mm_struct associated with it. If a task with valid mm_struct
+ * is found the ctx->pid is updated to use the task struct for subsequent
+ * translations. In case no valid mm_struct is found in the task group to
+ * service the fault a NULL is returned.
+ */
+static struct mm_struct *get_mem_context(struct cxl_context *ctx)
+{
+	struct task_struct *task = NULL;
+	struct mm_struct *mm = NULL;
+	struct pid *old_pid = ctx->pid;
+
+	if (old_pid == NULL) {
+		pr_warn("%s: Invalid context for pe=%d\n",
+			 __func__, ctx->pe);
+		return NULL;
+	}
+
+	task = get_pid_task(old_pid, PIDTYPE_PID);
+
+	/*
+	 * pid_alive may look racy but this saves us from costly
+	 * get_task_mm when the task is a zombie. In worst case
+	 * we may think a task is alive, which is about to die
+	 * but get_task_mm will return NULL.
+	 */
+	if (task != NULL && pid_alive(task))
+		mm = get_task_mm(task);
+
+	/* release the task struct that was taken earlier */
+	if (task)
+		put_task_struct(task);
+	else
+		pr_devel("%s: Context owning pid=%i for pe=%i dead\n",
+			__func__, pid_nr(old_pid), ctx->pe);
+
+	/*
+	 * If we couldn't find the mm context then use the group
+	 * leader to iterate over the task group and find a task
+	 * that gives us mm_struct.
+	 */
+	if (unlikely(mm == NULL && ctx->glpid != NULL)) {
+
+		rcu_read_lock();
+		task = pid_task(ctx->glpid, PIDTYPE_PID);
+		if (task)
+			do {
+				mm = get_task_mm(task);
+				if (mm) {
+					ctx->pid = get_task_pid(task,
+								PIDTYPE_PID);
+					break;
+				}
+				task = next_thread(task);
+			} while (task && !thread_group_leader(task));
+		rcu_read_unlock();
+
+		/* check if we switched pid */
+		if (ctx->pid != old_pid) {
+			if (mm)
+				pr_devel("%s:pe=%i switch pid %i->%i\n",
+					 __func__, ctx->pe, pid_nr(old_pid),
+					 pid_nr(ctx->pid));
+			else
+				pr_devel("%s:Cannot find mm for pid=%i\n",
+					 __func__, pid_nr(old_pid));
+
+			/* drop the reference to older pid */
+			put_pid(old_pid);
+		}
+	}
+
+	return mm;
+}
+
+
+
 void cxl_handle_fault(struct work_struct *fault_work)
 {
 	struct cxl_context *ctx =
 		container_of(fault_work, struct cxl_context, fault_work);
 	u64 dsisr = ctx->dsisr;
 	u64 dar = ctx->dar;
-	struct task_struct *task = NULL;
 	struct mm_struct *mm = NULL;
 
 	if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
@@ -195,17 +274,17 @@ void cxl_handle_fault(struct work_struct *fault_work)
 		"DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 
 	if (!ctx->kernel) {
-		if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
-			pr_devel("cxl_handle_fault unable to get task %i\n",
-				 pid_nr(ctx->pid));
+
+		mm = get_mem_context(ctx);
+		/* indicates all the thread in task group have exited */
+		if (mm == NULL) {
+			pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+				 __func__, ctx->pe, pid_nr(ctx->pid));
 			cxl_ack_ae(ctx);
 			return;
-		}
-		if (!(mm = get_task_mm(task))) {
-			pr_devel("cxl_handle_fault unable to get mm %i\n",
-				 pid_nr(ctx->pid));
-			cxl_ack_ae(ctx);
-			goto out;
+		} else {
+			pr_devel("Handling page fault for pe=%d pid=%i\n",
+				 ctx->pe, pid_nr(ctx->pid));
 		}
 	}
 
@@ -218,33 +297,22 @@ void cxl_handle_fault(struct work_struct *fault_work)
 
 	if (mm)
 		mmput(mm);
-out:
-	if (task)
-		put_task_struct(task);
 }
 
 static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
 {
-	int rc;
-	struct task_struct *task;
 	struct mm_struct *mm;
 
-	if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
-		pr_devel("cxl_prefault_one unable to get task %i\n",
-			 pid_nr(ctx->pid));
-		return;
-	}
-	if (!(mm = get_task_mm(task))) {
+	mm = get_mem_context(ctx);
+	if (mm == NULL) {
 		pr_devel("cxl_prefault_one unable to get mm %i\n",
 			 pid_nr(ctx->pid));
-		put_task_struct(task);
 		return;
 	}
 
-	rc = cxl_fault_segment(ctx, mm, ea);
+	cxl_fault_segment(ctx, mm, ea);
 
 	mmput(mm);
-	put_task_struct(task);
 }
 
 static u64 next_segment(u64 ea, u64 vsid)
@@ -263,18 +331,13 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
 	struct copro_slb slb;
 	struct vm_area_struct *vma;
 	int rc;
-	struct task_struct *task;
 	struct mm_struct *mm;
 
-	if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
-		pr_devel("cxl_prefault_vma unable to get task %i\n",
-			 pid_nr(ctx->pid));
-		return;
-	}
-	if (!(mm = get_task_mm(task))) {
+	mm = get_mem_context(ctx);
+	if (mm == NULL) {
 		pr_devel("cxl_prefault_vm unable to get mm %i\n",
 			 pid_nr(ctx->pid));
-		goto out1;
+		return;
 	}
 
 	down_read(&mm->mmap_sem);
@@ -295,8 +358,6 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
 	up_read(&mm->mmap_sem);
 
 	mmput(mm);
-out1:
-	put_task_struct(task);
 }
 
 void cxl_prefault(struct cxl_context *ctx, u64 wed)
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 5cc14599837d..783337d22f36 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -201,8 +201,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	 * where a process (master, some daemon, etc) has opened the chardev on
 	 * behalf of another process, so the AFU's mm gets bound to the process
 	 * that performs this ioctl and not the process that opened the file.
+	 * Also we grab the PID of the group leader so that if the task that
+	 * has performed the attach operation exits the mm context of the
+	 * process is still accessible.
 	 */
-	ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID));
+	ctx->pid = get_task_pid(current, PIDTYPE_PID);
+	ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
 
 	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 85761d7eb333..4c1903f781fc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -138,6 +138,7 @@ static const struct pci_device_id cxl_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
 	{ PCI_DEVICE_CLASS(0x120000, ~0), },
 
 	{ }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 826470d7f000..96e2486a6fc4 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -263,7 +263,8 @@ if ($arch eq "x86_64") {
 
 } elsif ($arch eq "powerpc") {
     $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
-    $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
+    # See comment in the sparc64 section for why we use '\w'.
+    $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?\\w*?)>:";
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
 
     if ($bits == 64) {