summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig7
-rw-r--r--arch/powerpc/include/asm/atomic.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/nmi.h11
-rw-r--r--arch/powerpc/include/asm/opal-api.h9
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/uaccess.h3
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S13
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c16
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S30
-rw-r--r--arch/powerpc/kernel/fadump.c3
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/mce_power.c56
-rw-r--r--arch/powerpc/kernel/misc_64.S12
-rw-r--r--arch/powerpc/kernel/prom_init.c3
-rw-r--r--arch/powerpc/kernel/setup_64.c19
-rw-r--r--arch/powerpc/kernel/smp.c20
-rw-r--r--arch/powerpc/kernel/watchdog.c386
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/lib/feature-fixups.c180
-rw-r--r--arch/powerpc/lib/sstep.c19
-rw-r--r--arch/powerpc/mm/mmap.c28
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c20
-rw-r--r--arch/powerpc/perf/isa207-common.c6
-rw-r--r--arch/powerpc/perf/power9-events-list.h4
-rw-r--r--arch/powerpc/perf/power9-pmu.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c22
-rw-r--r--arch/powerpc/platforms/powernv/opal.c19
-rw-r--r--arch/powerpc/platforms/powernv/setup.c11
31 files changed, 755 insertions, 164 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7177a3f4f418..36f858c37ca7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -82,7 +82,7 @@ config NR_IRQS
config NMI_IPI
bool
- depends on SMP && (DEBUGGER || KEXEC_CORE)
+ depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
default y
config STACKTRACE_SUPPORT
@@ -125,6 +125,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_SET_COHERENT_MASK
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
@@ -192,11 +193,13 @@ config PPC
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MOD_ARCH_SPECIFIC
- select HAVE_NMI if PERF_EVENTS
+ select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+ select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S)
select HAVE_OPROFILE
select HAVE_OPTPROBES if PPC64
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 2b90335194a7..a2cc8010cd72 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
{
long t1, t2;
@@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer", "memory");
- return t1;
+ return t1 != 0;
}
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 20b1485ff1e8..e2329db9d6f4 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
#else
struct page *page;
- page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+ page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
4);
if (!page)
return NULL;
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index ff1ccb375e60..6f8e79cd35d8 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -1,4 +1,15 @@
#ifndef _ASM_NMI_H
#define _ASM_NMI_H
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern void arch_touch_nmi_watchdog(void);
+
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+ bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+#else
+static inline void arch_touch_nmi_watchdog(void) {}
+#endif
+
#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index ef930ba500f9..3130a73652c7 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -876,6 +876,15 @@ struct OpalIoPhb4ErrorData {
enum {
OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
+
+ /* These two define the base MMU mode of the host on P9
+ *
+ * On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still
+ * create hash guests in "radix" mode with care (full core
+ * switch only).
+ */
+ OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
+ OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
};
typedef struct oppanel_line {
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 7e50e47375d6..a3b6575c7842 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1303,7 +1303,7 @@ static inline void msr_check_and_clear(unsigned long bits)
" .llong 0\n" \
".previous" \
: "=r" (rval) \
- : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \
+ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
rval;})
#else
#define mftb() ({unsigned long rval; \
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ebddb2111d87..8ea98504f900 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -55,6 +55,8 @@ struct smp_ops_t {
int (*cpu_bootable)(unsigned int nr);
};
+extern void smp_flush_nmi_ipi(u64 delay_us);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
extern void smp_send_debugger_break(void);
extern void start_secondary_resume(void);
extern void smp_generic_give_timebase(void);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4cf57f2126e6..9c0e60ca1666 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -90,9 +90,6 @@
#define __put_user_inatomic(x, ptr) \
__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
-
extern long __put_user_bad(void);
/*
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0845eebc5af3..4aa7c147e447 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
obj-$(CONFIG_VDSO32) += vdso32/
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 10cb2896b2ae..610955fe8b81 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -218,13 +218,20 @@ __init_tlb_power8:
ptesync
1: blr
+/*
+ * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process
+ * and one for partition scope to clear process and partition table entries.
+ */
__init_tlb_power9:
- li r6,POWER9_TLB_SETS_HASH
+ li r6,POWER9_TLB_SETS_HASH - 1
mtctr r6
li r7,0xc00 /* IS field = 0b11 */
+ li r8,0
ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
+ PPC_TLBIEL(7, 8, 2, 1, 0)
+ PPC_TLBIEL(7, 8, 2, 0, 0)
+2: addi r7,r7,0x1000
+ PPC_TLBIEL(7, 8, 0, 0, 0)
bdnz 2b
ptesync
1: blr
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 4c7656dc4e04..1df770e8cbe0 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -94,9 +94,6 @@ static void (*init_pmu_registers)(void);
static void cpufeatures_flush_tlb(void)
{
- unsigned long rb;
- unsigned int i, num_sets;
-
/*
* This is a temporary measure to keep equivalent TLB flush as the
* cputable based setup code.
@@ -105,24 +102,15 @@ static void cpufeatures_flush_tlb(void)
case PVR_POWER8:
case PVR_POWER8E:
case PVR_POWER8NVL:
- num_sets = POWER8_TLB_SETS;
+ __flush_tlb_power8(POWER8_TLB_SETS);
break;
case PVR_POWER9:
- num_sets = POWER9_TLB_SETS_HASH;
+ __flush_tlb_power9(POWER9_TLB_SETS_HASH);
break;
default:
- num_sets = 1;
pr_err("unknown CPU version for boot TLB flush\n");
break;
}
-
- asm volatile("ptesync" : : : "memory");
- rb = TLBIEL_INVAL_SET;
- for (i = 0; i < num_sets; i++) {
- asm volatile("tlbiel %0" : : "r" (rb));
- rb += 1 << TLBIEL_INVAL_SET_SHIFT;
- }
- asm volatile("ptesync" : : : "memory");
}
static void __restore_cpu_cpufeatures(void)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c18a5fbb4bb..e6d8354d79ef 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1314,6 +1314,31 @@ EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH)
+
+#define MASKED_DEC_HANDLER_LABEL 3f
+
+#define MASKED_DEC_HANDLER(_H) \
+3: /* soft-nmi */ \
+ std r12,PACA_EXGEN+EX_R12(r13); \
+ GET_SCRATCH0(r10); \
+ std r10,PACA_EXGEN+EX_R13(r13); \
+ EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+
+EXC_COMMON_BEGIN(soft_nmi_common)
+ mr r10,r1
+ ld r1,PACAEMERGSP(r13)
+ ld r1,PACA_NMI_EMERG_SP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
+ system_reset, soft_nmi_interrupt,
+ ADD_NVGPRS;ADD_RECONCILE)
+ b ret_from_except
+
+#else
+#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
+#define MASKED_DEC_HANDLER(_H)
+#endif
/*
* An interrupt came in while soft-disabled. We set paca->irq_happened, then:
@@ -1336,7 +1361,7 @@ masked_##_H##interrupt: \
lis r10,0x7fff; \
ori r10,r10,0xffff; \
mtspr SPRN_DEC,r10; \
- b 2f; \
+ b MASKED_DEC_HANDLER_LABEL; \
1: cmpwi r10,PACA_IRQ_DBELL; \
beq 2f; \
cmpwi r10,PACA_IRQ_HMI; \
@@ -1351,7 +1376,8 @@ masked_##_H##interrupt: \
ld r11,PACA_EXGEN+EX_R11(r13); \
GET_SCRATCH0(r13); \
##_H##rfid; \
- b .
+ b .; \
+ MASKED_DEC_HANDLER(_H)
/*
* Real mode exceptions actually use this too, but alternate
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 3079518f2245..dc0c49cfd90a 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
phdr->p_offset = phdr->p_paddr;
- phdr->p_memsz = vmcoreinfo_max_size;
- phdr->p_filesz = vmcoreinfo_max_size;
+ phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE;
/* Increment number of program headers. */
(elf->e_phnum)++;
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 9ad37f827a97..1086ea37c832 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,6 +25,7 @@
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <linux/nmi.h> /* hardlockup_detector_disable() */
#include <asm/reg.h>
#include <asm/sections.h>
@@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void)
static int __init kvm_guest_init(void)
{
+ /*
+ * The hardlockup detector is likely to get false positives in
+ * KVM guests, so disable it by default.
+ */
+ hardlockup_detector_disable();
+
if (!kvm_para_available())
goto free_tmp;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index d24e689e893f..b76ca198e09c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -53,6 +53,60 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action)
asm volatile("ptesync" : : : "memory");
}
+static void flush_tlb_300(unsigned int num_sets, unsigned int action)
+{
+ unsigned long rb;
+ unsigned int i;
+ unsigned int r;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ rb = TLBIEL_INVAL_SET;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ rb = TLBIEL_INVAL_SET_LPID;
+ break;
+ default:
+ BUG();
+ break;
+ }
+
+ asm volatile("ptesync" : : : "memory");
+
+ if (early_radix_enabled())
+ r = 1;
+ else
+ r = 0;
+
+ /*
+ * First flush table/PWC caches with set 0, then flush the
+ * rest of the sets, partition scope. Radix must then do it
+ * all again with process scope. Hash just has to flush
+ * process table.
+ */
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+ "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r));
+ for (i = 1; i < num_sets; i++) {
+ unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+ "r"(rb+set), "r"(0), "i"(2), "i"(0), "r"(r));
+ }
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+ "r"(rb), "r"(0), "i"(2), "i"(1), "r"(r));
+ if (early_radix_enabled()) {
+ for (i = 1; i < num_sets; i++) {
+ unsigned long set = i * (1<<TLBIEL_INVAL_SET_SHIFT);
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : :
+ "r"(rb+set), "r"(0), "i"(2), "i"(1), "r"(r));
+ }
+ }
+
+ asm volatile("ptesync" : : : "memory");
+}
+
/*
* Generic routines to flush TLB on POWER processors. These routines
* are used as flush_tlb hook in the cpu_spec.
@@ -79,7 +133,7 @@ void __flush_tlb_power9(unsigned int action)
else
num_sets = POWER9_TLB_SETS_HASH;
- flush_tlb_206(num_sets, action);
+ flush_tlb_300(num_sets, action);
}
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c119044cad0d..8ac0bd2bddb0 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -614,6 +614,18 @@ _GLOBAL(kexec_sequence)
li r0,0
std r0,16(r1)
+BEGIN_FTR_SECTION
+ /*
+ * This is the best time to turn AMR/IAMR off.
+ * key 0 is used in radix for supervisor<->user
+ * protection, but on hash key 0 is reserved
+ * ideally we want to enter with a clean state.
+ * NOTE, we rely on r0 being 0 from above.
+ */
+ mtspr SPRN_IAMR,r0
+ mtspr SPRN_AMOR,r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
/* save regs for local vars on new stack.
* yes, we won't go back, but ...
*/
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index dd8a04f3053a..613f79f03877 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -15,6 +15,9 @@
#undef DEBUG_PROM
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
#include <stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4640f6d64f8b..af23d4b576ec 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void)
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
- return ppc_proc_freq * watchdog_thresh;
-}
-
-/*
- * The hardlockup detector breaks PMU event based branches and is likely
- * to get false positives in KVM guests, so disable it by default.
- */
-static int __init disable_hardlockup_detector(void)
-{
- hardlockup_detector_disable();
-
- return 0;
-}
-early_initcall(disable_hardlockup_detector);
-#endif
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c6b8bace1766..997c88d54acf 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -435,13 +435,31 @@ static void do_smp_send_nmi_ipi(int cpu)
}
}
+void smp_flush_nmi_ipi(u64 delay_us)
+{
+ unsigned long flags;
+
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy_count) {
+ nmi_ipi_unlock_end(&flags);
+ udelay(1);
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ return;
+ }
+ nmi_ipi_lock_start(&flags);
+ }
+ nmi_ipi_unlock_end(&flags);
+}
+
/*
* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
* - fn is the target callback function.
* - delay_us > 0 is the delay before giving up waiting for targets to
* enter the handler, == 0 specifies indefinite delay.
*/
-static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
{
unsigned long flags;
int me = raw_smp_processor_id();
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644
index 000000000000..b67f8b03a32d
--- /dev/null
+++ b/arch/powerpc/kernel/watchdog.c
@@ -0,0 +1,386 @@
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/paca.h>
+
+/*
+ * The watchdog has a simple timer that runs on each CPU, once per timer
+ * period. This is the heartbeat.
+ *
+ * Then there are checks to see if the heartbeat has not triggered on a CPU
+ * for the panic timeout period. Currently the watchdog only supports an
+ * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ *
+ * This is not an NMI watchdog, but Linux uses that name for a generic
+ * watchdog in some cases, so NMI gets used in some places.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/*
+ * These are for the SMP checker. CPUs clear their pending bit in their
+ * heartbeat. If the bitmask becomes empty, the time is noted and the
+ * bitmask is refilled.
+ *
+ * All CPUs clear their bit in the pending mask every timer period.
+ * Once all have cleared, the time is noted and the bits are reset.
+ * If the time since all clear was greater than the panic timeout,
+ * we can panic with the list of stuck CPUs.
+ *
+ * This will work best with NMI IPIs for crash code so the stuck CPUs
+ * can be pulled out to get their backtraces.
+ */
+static unsigned long __wd_smp_lock;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+ /*
+ * Avoid locking layers if possible.
+ * This may be called from low level interrupt handlers at some
+ * point in future.
+ */
+ local_irq_save(*flags);
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
+ cpu_relax();
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+ clear_bit_unlock(0, &__wd_smp_lock);
+ local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+ pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id());
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+}
+
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ }
+}
+
+static void watchdog_smp_panic(int cpu, u64 tb)
+{
+ unsigned long flags;
+ int c;
+
+ wd_smp_lock(&flags);
+ /* Double check some things under lock */
+ if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ goto out;
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+ goto out;
+ if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ goto out;
+
+ pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+
+ /*
+ * Try to trigger the stuck CPUs.
+ */
+ for_each_cpu(c, &wd_smp_cpus_pending) {
+ if (c == cpu)
+ continue;
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ }
+ smp_flush_nmi_ipi(1000000);
+
+ /* Take the stuck CPU out of the watch group */
+ for_each_cpu(c, &wd_smp_cpus_pending)
+ set_cpu_stuck(c, tb);
+
+out:
+ wd_smp_unlock(&flags);
+
+ printk_safe_flush();
+ /*
+ * printk_safe_flush() seems to require another print
+ * before anything actually goes out to console.
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(NULL, "Hard LOCKUP");
+}
+
+static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+{
+ if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+ if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+ unsigned long flags;
+
+ pr_emerg("Watchdog CPU:%d became unstuck\n", cpu);
+ wd_smp_lock(&flags);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+ wd_smp_unlock(&flags);
+ }
+ return;
+ }
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ unsigned long flags;
+
+ wd_smp_lock(&flags);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = tb;
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ }
+ wd_smp_unlock(&flags);
+ }
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+ u64 tb = get_tb();
+
+ per_cpu(wd_timer_tb, cpu) = tb;
+
+ wd_smp_clear_cpu_pending(cpu, tb);
+
+ if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+ watchdog_smp_panic(cpu, tb);
+}
+
+void soft_nmi_interrupt(struct pt_regs *regs)
+{
+ unsigned long flags;
+ int cpu = raw_smp_processor_id();
+ u64 tb;
+
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return;
+
+ nmi_enter();
+ tb = get_tb();
+ if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+
+ wd_smp_lock(&flags);
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+ wd_smp_unlock(&flags);
+ goto out;
+ }
+ set_cpu_stuck(cpu, tb);
+
+ pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ wd_smp_unlock(&flags);
+
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutself_cpu_backtrace();
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+ }
+ if (wd_panic_timeout_tb < 0x7fffffff)
+ mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+out:
+ nmi_exit();
+}
+
+static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
+{
+ t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
+ if (wd_timer_period_ms > 1000)
+ t->expires = __round_jiffies_up(t->expires, cpu);
+ add_timer_on(t, cpu);
+}
+
+static void wd_timer_fn(unsigned long data)
+{
+ struct timer_list *t = this_cpu_ptr(&wd_timer);
+ int cpu = smp_processor_id();
+
+ watchdog_timer_interrupt(cpu);
+
+ wd_timer_reset(cpu, t);
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+ int cpu = smp_processor_id();
+
+ watchdog_timer_interrupt(cpu);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog_timer_on(unsigned int cpu)
+{
+ struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+ per_cpu(wd_timer_tb, cpu) = get_tb();
+
+ setup_pinned_timer(t, wd_timer_fn, 0);
+ wd_timer_reset(cpu, t);
+}
+
+static void stop_watchdog_timer_on(unsigned int cpu)
+{
+ struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
+
+ del_timer_sync(t);
+}
+
+static int start_wd_on_cpu(unsigned int cpu)
+{
+ if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+ WARN_ON(1);
+ return 0;
+ }
+
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return 0;
+
+ if (watchdog_suspended)
+ return 0;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return 0;
+
+ cpumask_set_cpu(cpu, &wd_cpus_enabled);
+ if (cpumask_weight(&wd_cpus_enabled) == 1) {
+ cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+ wd_smp_last_reset_tb = get_tb();
+ }
+ smp_wmb();
+ start_watchdog_timer_on(cpu);
+
+ return 0;
+}
+
+static int stop_wd_on_cpu(unsigned int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return 0; /* Can happen in CPU unplug case */
+
+ stop_watchdog_timer_on(cpu);
+
+ cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_clear_cpu_pending(cpu, get_tb());
+
+ return 0;
+}
+
+static void watchdog_calc_timeouts(void)
+{
+ wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq;
+
+ /* Have the SMP detector trigger a bit later */
+ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+ /* 2/5 is the factor that the perf based detector uses */
+ wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_nmi_reconfigure(void)
+{
+ int cpu;
+
+ watchdog_calc_timeouts();
+
+ for_each_cpu(cpu, &wd_cpus_enabled)
+ stop_wd_on_cpu(cpu);
+
+ for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+ start_wd_on_cpu(cpu);
+}
+
+/*
+ * This runs after lockup_detector_init() which sets up watchdog_cpumask.
+ */
+static int __init powerpc_watchdog_init(void)
+{
+ int err;
+
+ watchdog_calc_timeouts();
+
+ err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online",
+ start_wd_on_cpu, stop_wd_on_cpu);
+ if (err < 0)
+ pr_warn("Watchdog could not be initialized");
+
+ return 0;
+}
+arch_initcall(powerpc_watchdog_init);
+
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+ nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ if (cpu == smp_processor_id())
+ handle_backtrace_ipi(NULL);
+ else
+ smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000);
+ }
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+ nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
+}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 710e491206ed..8cb0190e2a73 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
}
if (!hpt)
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
|__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index f3917705c686..41cf5ae273cf 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -233,192 +233,192 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p)
static void test_basic_patching(void)
{
- extern unsigned int ftr_fixup_test1;
- extern unsigned int end_ftr_fixup_test1;
- extern unsigned int ftr_fixup_test1_orig;
- extern unsigned int ftr_fixup_test1_expected;
- int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+ extern unsigned int ftr_fixup_test1[];
+ extern unsigned int end_ftr_fixup_test1[];
+ extern unsigned int ftr_fixup_test1_orig[];
+ extern unsigned int ftr_fixup_test1_expected[];
+ int size = end_ftr_fixup_test1 - ftr_fixup_test1;
fixup.value = fixup.mask = 8;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
fixup.alt_start_off = fixup.alt_end_off = 0;
/* Sanity check */
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
patch_feature_section(~8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
}
static void test_alternative_patching(void)
{
- extern unsigned int ftr_fixup_test2;
- extern unsigned int end_ftr_fixup_test2;
- extern unsigned int ftr_fixup_test2_orig;
- extern unsigned int ftr_fixup_test2_alt;
- extern unsigned int ftr_fixup_test2_expected;
- int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+ extern unsigned int ftr_fixup_test2[];
+ extern unsigned int end_ftr_fixup_test2[];
+ extern unsigned int ftr_fixup_test2_orig[];
+ extern unsigned int ftr_fixup_test2_alt[];
+ extern unsigned int ftr_fixup_test2_expected[];
+ int size = end_ftr_fixup_test2 - ftr_fixup_test2;
fixup.value = fixup.mask = 0xF;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
/* Sanity check */
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
patch_feature_section(~0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
}
static void test_alternative_case_too_big(void)
{
- extern unsigned int ftr_fixup_test3;
- extern unsigned int end_ftr_fixup_test3;
- extern unsigned int ftr_fixup_test3_orig;
- extern unsigned int ftr_fixup_test3_alt;
- int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+ extern unsigned int ftr_fixup_test3[];
+ extern unsigned int end_ftr_fixup_test3[];
+ extern unsigned int ftr_fixup_test3_orig[];
+ extern unsigned int ftr_fixup_test3_alt[];
+ int size = end_ftr_fixup_test3 - ftr_fixup_test3;
fixup.value = fixup.mask = 0xC;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
/* Expect nothing to be patched, and the error returned to us */
check(patch_feature_section(0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(0, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(~0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
}
static void test_alternative_case_too_small(void)
{
- extern unsigned int ftr_fixup_test4;
- extern unsigned int end_ftr_fixup_test4;
- extern unsigned int ftr_fixup_test4_orig;
- extern unsigned int ftr_fixup_test4_alt;
- extern unsigned int ftr_fixup_test4_expected;
- int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+ extern unsigned int ftr_fixup_test4[];
+ extern unsigned int end_ftr_fixup_test4[];
+ extern unsigned int ftr_fixup_test4_orig[];
+ extern unsigned int ftr_fixup_test4_alt[];
+ extern unsigned int ftr_fixup_test4_expected[];
+ int size = end_ftr_fixup_test4 - ftr_fixup_test4;
unsigned long flag;
/* Check a high-bit flag */
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
fixup.value = fixup.mask = flag;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
patch_feature_section(~flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
}
static void test_alternative_case_with_branch(void)
{
- extern unsigned int ftr_fixup_test5;
- extern unsigned int end_ftr_fixup_test5;
- extern unsigned int ftr_fixup_test5_expected;
- int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+ extern unsigned int ftr_fixup_test5[];
+ extern unsigned int end_ftr_fixup_test5[];
+ extern unsigned int ftr_fixup_test5_expected[];
+ int size = end_ftr_fixup_test5 - ftr_fixup_test5;
- check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+ check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
}
static void test_alternative_case_with_external_branch(void)
{
- extern unsigned int ftr_fixup_test6;
- extern unsigned int end_ftr_fixup_test6;
- extern unsigned int ftr_fixup_test6_expected;
- int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+ extern unsigned int ftr_fixup_test6[];
+ extern unsigned int end_ftr_fixup_test6[];
+ extern unsigned int ftr_fixup_test6_expected[];
+ int size = end_ftr_fixup_test6 - ftr_fixup_test6;
- check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+ check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
static void test_cpu_macros(void)
{
- extern u8 ftr_fixup_test_FTR_macros;
- extern u8 ftr_fixup_test_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FTR_macros_expected -
- &ftr_fixup_test_FTR_macros;
+ extern u8 ftr_fixup_test_FTR_macros[];
+ extern u8 ftr_fixup_test_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FTR_macros_expected -
+ ftr_fixup_test_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FTR_macros,
- &ftr_fixup_test_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FTR_macros,
+ ftr_fixup_test_FTR_macros_expected, size) == 0);
}
static void test_fw_macros(void)
{
#ifdef CONFIG_PPC64
- extern u8 ftr_fixup_test_FW_FTR_macros;
- extern u8 ftr_fixup_test_FW_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
- &ftr_fixup_test_FW_FTR_macros;
+ extern u8 ftr_fixup_test_FW_FTR_macros[];
+ extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+ ftr_fixup_test_FW_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FW_FTR_macros,
- &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FW_FTR_macros,
+ ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
#endif
}
static void test_lwsync_macros(void)
{
- extern u8 lwsync_fixup_test;
- extern u8 end_lwsync_fixup_test;
- extern u8 lwsync_fixup_test_expected_LWSYNC;
- extern u8 lwsync_fixup_test_expected_SYNC;
- unsigned long size = &end_lwsync_fixup_test -
- &lwsync_fixup_test;
+ extern u8 lwsync_fixup_test[];
+ extern u8 end_lwsync_fixup_test[];
+ extern u8 lwsync_fixup_test_expected_LWSYNC[];
+ extern u8 lwsync_fixup_test_expected_SYNC[];
+ unsigned long size = end_lwsync_fixup_test -
+ lwsync_fixup_test;
/* The fixups have already been done for us during boot */
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_LWSYNC, size) == 0);
} else {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_SYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_SYNC, size) == 0);
}
}
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8a0882..ee33327686ae 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
case 19:
switch ((instr >> 1) & 0x3ff) {
case 0: /* mcrf */
- rd = (instr >> 21) & 0x1c;
- ra = (instr >> 16) & 0x1c;
+ rd = 7 - ((instr >> 23) & 0x7);
+ ra = 7 - ((instr >> 18) & 0x7);
+ rd *= 4;
+ ra *= 4;
val = (regs->ccr >> ra) & 0xf;
regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
goto instr_done;
@@ -964,6 +966,19 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
#endif
case 19: /* mfcr */
+ if ((instr >> 20) & 1) {
+ imm = 0xf0000000UL;
+ for (sh = 0; sh < 8; ++sh) {
+ if (instr & (0x80000 >> sh)) {
+ regs->gpr[rd] = regs->ccr & imm;
+ break;
+ }
+ imm >>= 4;
+ }
+
+ goto instr_done;
+ }
+
regs->gpr[rd] = regs->ccr;
regs->gpr[rd] &= 0xffffffffUL;
goto instr_done;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0ee6be4f1ba4..5d78b193fec4 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -34,16 +34,9 @@
/*
* Top of mmap area (just below the process stack).
*
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
+ * Leave at least a ~128 MB hole.
*/
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
+#define MIN_GAP (128*1024*1024)
#define MAX_GAP (TASK_SIZE/6*5)
static inline int mmap_is_legacy(void)
@@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void)
return rnd << PAGE_SHIFT;
}
+static inline unsigned long stack_maxrandom_size(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ return (1<<23);
+ else
+ return (1<<30);
+}
+
static inline unsigned long mmap_base(unsigned long rnd)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
+
+ /* Values close to RLIM_INFINITY can overflow. */
+ if (gap + pad > gap)
+ gap += pad;
if (gap < MIN_GAP)
gap = MIN_GAP;
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 71de2c6d88f3..abed1fe6992f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm)
rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
+ /*
+ * Order the above store with subsequent update of the PID
+ * register (at which point HW can start loading/caching
+ * the entry) and the corresponding load by the MMU from
+ * the L2 cache.
+ */
+ asm volatile("ptesync;isync" : : : "memory");
+
mm->context.npu_context = NULL;
return index;
@@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm)
mm->context.cop_lockp = NULL;
#endif /* CONFIG_PPC_ICSWX */
- if (radix_enabled())
- process_tb[mm->context.id].prtb1 = 0;
- else
+ if (radix_enabled()) {
+ /*
+ * Radix doesn't have a valid bit in the process table
+ * entries. However we know that at least P9 implementation
+ * will avoid caching an entry with an invalid RTS field,
+ * and 0 is invalid. So this will do.
+ */
+ process_tb[mm->context.id].prtb0 = 0;
+ } else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
__destroy_context(mm->context.id);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 8125160be7bc..3f3aa9a7063a 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -90,13 +90,15 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
* MMCRA[SDAR_MODE] will be set to 0b01
* For rest
* MMCRA[SDAR_MODE] will be set from event code.
+ * If sdar_mode from event is zero, default to 0b01. Hardware
+ * requires that we set a non-zero value.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
- else if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
+ else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event))
*mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
- else if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ else
*mmcra |= MMCRA_SDAR_MODE_TLB;
} else
*mmcra |= MMCRA_SDAR_MODE_TLB;
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 80204e064362..50689180a6c1 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -51,8 +51,12 @@ EVENT(PM_DTLB_MISS, 0x300fc)
EVENT(PM_ITLB_MISS, 0x400fc)
/* Run_Instructions */
EVENT(PM_RUN_INST_CMPL, 0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa)
/* Run_cycles */
EVENT(PM_RUN_CYC, 0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT, 0x200f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index f17435e4a489..2280cf87ff9c 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -107,6 +107,8 @@ extern struct attribute_group isa207_pmu_format_group;
/* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
{ PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
};
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index d8af9bc0489f..9558d725a99b 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -605,6 +605,24 @@ static const match_table_t spufs_tokens = {
{ Opt_err, NULL },
};
+static int spufs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb);
+ struct inode *inode = root->d_inode;
+
+ if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, inode->i_uid));
+ if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, inode->i_gid));
+ if ((inode->i_mode & S_IALLUGO) != 0775)
+ seq_printf(m, ",mode=%o", inode->i_mode);
+ if (sbi->debug)
+ seq_puts(m, ",debug");
+ return 0;
+}
+
static int
spufs_parse_options(struct super_block *sb, char *options, struct inode *root)
{
@@ -724,11 +742,9 @@ spufs_fill_super(struct super_block *sb, void *data, int silent)
.destroy_inode = spufs_destroy_inode,
.statfs = simple_statfs,
.evict_inode = spufs_evict_inode,
- .show_options = generic_show_options,
+ .show_options = spufs_show_options,
};
- save_mount_options(sb, data);
-
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 59684b4af4d1..9b87abb178f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -59,6 +59,8 @@ static struct task_struct *kopald_tsk;
void opal_configure_cores(void)
{
+ u64 reinit_flags = 0;
+
/* Do the actual re-init, This will clobber all FPRs, VRs, etc...
*
* It will preserve non volatile GPRs and HSPRG0/1. It will
@@ -66,11 +68,24 @@ void opal_configure_cores(void)
* but it might clobber a bunch.
*/
#ifdef __BIG_ENDIAN__
- opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+ reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
#else
- opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+ reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
#endif
+ /*
+ * POWER9 always support running hash:
+ * ie. Host hash supports hash guests
+ * Host radix supports hash/radix guests
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
+ if (early_radix_enabled())
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
+ }
+
+ opal_reinit_cpus(reinit_flags);
+
/* Restore some bits */
if (cur_cpu_spec->cpu_restore)
cur_cpu_spec->cpu_restore();
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 2dc7e5fb86c3..897aa1400eb8 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -225,6 +225,8 @@ static void pnv_kexec_wait_secondaries_down(void)
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
+ u64 reinit_flags;
+
if (xive_enabled())
xive_kexec_teardown_cpu(secondary);
else
@@ -254,8 +256,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
* We might be running as little-endian - now that interrupts
* are disabled, reset the HILE bit to big-endian so we don't
* take interrupts in the wrong endian later
+ *
+ * We reinit to enable both radix and hash on P9 to ensure
+ * the mode used by the next kernel is always supported.
*/
- opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+ reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
+ OPAL_REINIT_CPUS_MMU_HASH;
+ opal_reinit_cpus(reinit_flags);
}
}
#endif /* CONFIG_KEXEC_CORE */