summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig.debug6
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h18
-rw-r--r--arch/powerpc/include/asm/opal-api.h3
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/paca.h29
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h2
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/module_64.c27
-rw-r--r--arch/powerpc/kernel/prom_init.c1
-rw-r--r--arch/powerpc/kernel/stacktrace.c7
-rw-r--r--arch/powerpc/mm/hash_utils_64.c11
-rw-r--r--arch/powerpc/mm/slb.c2
-rw-r--r--arch/powerpc/mm/slice.c3
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c1
-rw-r--r--arch/powerpc/platforms/powernv/Makefile1
-rw-r--r--arch/powerpc/platforms/powernv/opal-kmsg.c75
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S1
-rw-r--r--arch/powerpc/platforms/powernv/opal.c5
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c84
-rw-r--r--arch/powerpc/platforms/powernv/pci.c2
-rw-r--r--drivers/misc/cxl/Makefile3
-rw-r--r--drivers/misc/cxl/api.c2
-rw-r--r--drivers/misc/cxl/context.c6
-rw-r--r--drivers/misc/cxl/cxl.h3
-rw-r--r--drivers/misc/cxl/fault.c129
-rw-r--r--drivers/misc/cxl/file.c6
-rw-r--r--drivers/misc/cxl/pci.c1
-rwxr-xr-xscripts/recordmcount.pl3
31 files changed, 378 insertions, 75 deletions
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3a510f4a6b68..77e2cefe47eb 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -64,17 +64,17 @@ config PPC_EMULATED_STATS
emulated.
config CODE_PATCHING_SELFTEST
- bool "Run self-tests of the code-patching code."
+ bool "Run self-tests of the code-patching code"
depends on DEBUG_KERNEL
default n
config FTR_FIXUP_SELFTEST
- bool "Run self-tests of the feature-fixup code."
+ bool "Run self-tests of the feature-fixup code"
depends on DEBUG_KERNEL
default n
config MSI_BITMAP_SELFTEST
- bool "Run self-tests of the MSI bitmap code."
+ bool "Run self-tests of the MSI bitmap code"
depends on DEBUG_KERNEL
default n
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 9e861b4378bd..2ff8b3df553d 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -33,7 +33,12 @@
#define _PAGE_F_GIX_SHIFT 12
#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
#define _PAGE_SPECIAL 0x10000 /* software: special page */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */
+#else
+#define _PAGE_SOFT_DIRTY 0x00000
+#endif
/*
* THP pages can't be special. So use the _PAGE_SPECIAL
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 03c1a5a21c0c..b3a5badab69f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -158,12 +158,22 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define __swp_entry(type, offset) ((swp_entry_t) { \
((type) << _PAGE_BIT_SWAP_TYPE) \
| ((offset) << PTE_RPN_SHIFT) })
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
-#define __swp_entry_to_pte(x) __pte((x).val)
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+#else
+#define _PAGE_SWP_SOFT_DIRTY 0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
@@ -176,8 +186,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
}
-#else
-#define _PAGE_SWP_SOFT_DIRTY 0
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 8374afed9d0a..f8faaaeeca1e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -157,7 +157,8 @@
#define OPAL_LEDS_GET_INDICATOR 114
#define OPAL_LEDS_SET_INDICATOR 115
#define OPAL_CEC_REBOOT2 116
-#define OPAL_LAST 116
+#define OPAL_CONSOLE_FLUSH 117
+#define OPAL_LAST 117
/* Device tree flags */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 800115910e43..07a99e638449 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -35,6 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length,
uint8_t *buffer);
int64_t opal_console_write_buffer_space(int64_t term_number,
__be64 *length);
+int64_t opal_console_flush(int64_t term_number);
int64_t opal_rtc_read(__be32 *year_month_day,
__be64 *hour_minute_second_millisecond);
int64_t opal_rtc_write(uint32_t year_month_day,
@@ -262,6 +263,8 @@ extern int opal_resync_timebase(void);
extern void opal_lpc_init(void);
+extern void opal_kmsg_init(void);
+
extern int opal_event_request(unsigned int opal_event_nr);
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 70bd4381f8e6..546540b91095 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -16,6 +16,7 @@
#ifdef CONFIG_PPC64
+#include <linux/string.h>
#include <asm/types.h>
#include <asm/lppaca.h>
#include <asm/mmu.h>
@@ -131,7 +132,16 @@ struct paca_struct {
struct tlb_core_data tcd;
#endif /* CONFIG_PPC_BOOK3E */
- mm_context_t context;
+#ifdef CONFIG_PPC_BOOK3S
+ mm_context_id_t mm_ctx_id;
+#ifdef CONFIG_PPC_MM_SLICES
+ u64 mm_ctx_low_slices_psize;
+ unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+ u16 mm_ctx_user_psize;
+ u16 mm_ctx_sllp;
+#endif
+#endif
/*
* then miscellaneous read-write fields
@@ -194,6 +204,23 @@ struct paca_struct {
#endif
};
+#ifdef CONFIG_PPC_BOOK3S
+static inline void copy_mm_to_paca(mm_context_t *context)
+{
+ get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+ get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_high_slices_psize,
+ &context->high_slices_psize, SLICE_ARRAY_SIZE);
+#else
+ get_paca()->mm_ctx_user_psize = context->user_psize;
+ get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+}
+#else
+static inline void copy_mm_to_paca(mm_context_t *context){}
+#endif
+
extern struct paca_struct *paca;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 43686043e297..8dde19962a5b 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -43,5 +43,7 @@
#define PPC_FEATURE2_TAR 0x04000000
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
#define PPC_FEATURE2_HTM_NOSC 0x01000000
+#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */
+#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
#endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 59dad113897b..c2d21d11c2d2 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -295,6 +295,8 @@ do { \
#define R_PPC64_TLSLD 108
#define R_PPC64_TOCSAVE 109
+#define R_PPC64_ENTRY 118
+
#define R_PPC64_REL16 249
#define R_PPC64_REL16_LO 250
#define R_PPC64_REL16_HI 251
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 221d584d089f..07cebc3514f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,14 +185,16 @@ int main(void)
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
- DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_BOOK3S
+ DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id));
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
- context.low_slices_psize));
+ mm_ctx_low_slices_psize));
DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
- context.high_slices_psize));
+ mm_ctx_high_slices_psize));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
+#endif
#ifdef CONFIG_PPC_BOOK3E
DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
@@ -222,7 +224,7 @@ int main(void)
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
#else
- DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+ DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp));
#endif /* CONFIG_PPC_MM_SLICES */
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 68384514506b..59663af9315f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*/
break;
+ case R_PPC64_ENTRY:
+ /*
+ * Optimize ELFv2 large code model entry point if
+ * the TOC is within 2GB range of current location.
+ */
+ value = my_r2(sechdrs, me) - (unsigned long)location;
+ if (value + 0x80008000 > 0xffffffff)
+ break;
+ /*
+ * Check for the large code model prolog sequence:
+ * ld r2, ...(r12)
+ * add r2, r2, r12
+ */
+ if ((((uint32_t *)location)[0] & ~0xfffc)
+ != 0xe84c0000)
+ break;
+ if (((uint32_t *)location)[1] != 0x7c426214)
+ break;
+ /*
+ * If found, replace it with:
+ * addis r2, r12, (.TOC.-func)@ha
+ * addi r2, r12, (.TOC.-func)@l
+ */
+ ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
+ ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+ break;
+
case R_PPC64_REL16_HA:
/* Subtract location pointer */
value -= (unsigned long)location;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 92dea8df6b26..da5192590c44 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -389,6 +389,7 @@ static void __init prom_printf(const char *format, ...)
break;
}
}
+ va_end(args);
}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index ea43a347a104..4f24606afc3f 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -61,3 +61,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
save_context_stack(trace, tsk->thread.ksp, tsk, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void
+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+ save_context_stack(trace, regs->gpr[1], current, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 4233dcccbaf7..ba59d5977f34 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -853,11 +853,11 @@ static unsigned int get_paca_psize(unsigned long addr)
unsigned long index, mask_index;
if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->context.low_slices_psize;
+ lpsizes = get_paca()->mm_ctx_low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
return (lpsizes >> (index * 4)) & 0xF;
}
- hpsizes = get_paca()->context.high_slices_psize;
+ hpsizes = get_paca()->mm_ctx_high_slices_psize;
index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 0x1;
return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
@@ -866,7 +866,7 @@ static unsigned int get_paca_psize(unsigned long addr)
#else
unsigned int get_paca_psize(unsigned long addr)
{
- return get_paca()->context.user_psize;
+ return get_paca()->mm_ctx_user_psize;
}
#endif
@@ -882,7 +882,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
copro_flush_all_slbs(mm);
if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
- get_paca()->context = mm->context;
+
+ copy_mm_to_paca(&mm->context);
slb_flush_and_rebolt();
}
}
@@ -949,7 +950,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
{
if (user_region) {
if (psize != get_paca_psize(ea)) {
- get_paca()->context = mm->context;
+ copy_mm_to_paca(&mm->context);
slb_flush_and_rebolt();
}
} else if (get_paca()->vmalloc_sllp !=
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 515730e499fe..825b6873391f 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -228,7 +228,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
asm volatile("slbie %0" : : "r" (slbie_data));
get_paca()->slb_cache_ptr = 0;
- get_paca()->context = mm->context;
+ copy_mm_to_paca(&mm->context);
/*
* preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0f432a702870..42954f0b47ac 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -185,8 +185,7 @@ static void slice_flush_segments(void *parm)
if (mm != current->active_mm)
return;
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
+ copy_mm_to_paca(&current->active_mm->context);
local_irq_save(flags);
slb_flush_and_rebolt();
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 76f5013c35e5..c3c9bbb3573a 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -84,6 +84,7 @@ static void __init bootx_printf(const char *format, ...)
break;
}
}
+ va_end(args);
}
#else /* CONFIG_BOOTX_TEXT */
static void __init bootx_printf(const char *format, ...) {}
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index ee774e8a4837..f1516b5ecec9 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,6 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o
obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y += opal-kmsg.o
obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 000000000000..6f1214d4de92
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,75 @@
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware. The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output. In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed. This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void force_opal_console_flush(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason)
+{
+ int i;
+ int64_t ret;
+
+ /*
+ * Outside of a panic context the pollers will continue to run,
+ * so we don't need to do any special flushing.
+ */
+ if (reason != KMSG_DUMP_PANIC)
+ return;
+
+ if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
+ ret = opal_console_flush(0);
+
+ if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+ return;
+
+ /* Incrementally flush until there's nothing left */
+ while (opal_console_flush(0) != OPAL_SUCCESS);
+ } else {
+ /*
+ * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+ * the console can still be flushed by calling the polling
+ * function enough times to flush the buffer. We don't know
+ * how much output still needs to be flushed, but we can be
+ * generous since the kernel is in panic and doesn't need
+ * to do much else.
+ */
+ printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
+ for (i = 0; i < 1024; i++) {
+ opal_poll_events(NULL);
+ }
+ }
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+ .dump = force_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+ int rc;
+
+ /* Add our dumper to the list */
+ rc = kmsg_dump_register(&opal_kmsg_dumper);
+ if (rc != 0)
+ pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 1b149c92fca1..f8868864f373 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -50,7 +50,7 @@ unsigned long __init opal_get_boot_time(void)
rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
if (rc == OPAL_BUSY_EVENT)
opal_poll_events(NULL);
- else
+ else if (rc == OPAL_BUSY)
mdelay(10);
}
if (rc != OPAL_SUCCESS)
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index b7a464fef7a7..e45b88a5d7e0 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aad0033d65d1..81f4a3ab8743 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -548,7 +548,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
goto out;
if ((regs->nip >= opal.base) &&
- (regs->nip <= (opal.base + opal.size)))
+ (regs->nip < (opal.base + opal.size)))
recover_addr = find_recovery_address(regs->nip);
/*
@@ -748,6 +748,9 @@ static int __init opal_init(void)
opal_pdev_init(opal_node, "ibm,opal-flash");
opal_pdev_init(opal_node, "ibm,opal-prd");
+ /* Initialise OPAL kmsg dumper for flushing console on panic */
+ opal_kmsg_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 323e1e58da93..573ae1994097 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1074,16 +1074,75 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pnv_ioda_link_pe_by_weight(phb, pe);
}
-static void pnv_ioda_setup_dev_PEs(struct pci_bus *bus)
+static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
+{
+ int pe_num, found_pe = false, rc;
+ long rid;
+ struct pnv_ioda_pe *pe;
+ struct pci_dev *gpu_pdev;
+ struct pci_dn *npu_pdn;
+ struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ /*
+ * Due to a hardware errata PE#0 on the NPU is reserved for
+ * error handling. This means we only have three PEs remaining
+ * which need to be assigned to four links, implying some
+ * links must share PEs.
+ *
+ * To achieve this we assign PEs such that NPUs linking the
+ * same GPU get assigned the same PE.
+ */
+ gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
+ for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+ pe = &phb->ioda.pe_array[pe_num];
+ if (!pe->pdev)
+ continue;
+
+ if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
+ /*
+ * This device has the same peer GPU so should
+ * be assigned the same PE as the existing
+ * peer NPU.
+ */
+ dev_info(&npu_pdev->dev,
+ "Associating to existing PE %d\n", pe_num);
+ pci_dev_get(npu_pdev);
+ npu_pdn = pci_get_pdn(npu_pdev);
+ rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
+ npu_pdn->pcidev = npu_pdev;
+ npu_pdn->pe_number = pe_num;
+ pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
+ phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+ /* Map the PE to this link */
+ rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
+ OpalPciBusAll,
+ OPAL_COMPARE_RID_DEVICE_NUMBER,
+ OPAL_COMPARE_RID_FUNCTION_NUMBER,
+ OPAL_MAP_PE);
+ WARN_ON(rc != OPAL_SUCCESS);
+ found_pe = true;
+ break;
+ }
+ }
+
+ if (!found_pe)
+ /*
+ * Could not find an existing PE so allocate a new
+ * one.
+ */
+ return pnv_ioda_setup_dev_PE(npu_pdev);
+ else
+ return pe;
+}
+
+static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
{
- struct pci_bus *child;
struct pci_dev *pdev;
list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_dev_PE(pdev);
-
- list_for_each_entry(child, &bus->children, node)
- pnv_ioda_setup_dev_PEs(child);
+ pnv_ioda_setup_npu_PE(pdev);
}
static void pnv_ioda_setup_PEs(struct pci_bus *bus)
@@ -1127,9 +1186,11 @@ static void pnv_pci_ioda_setup_PEs(void)
* functions. PCI bus dependent PEs are required for the
* remaining types of PHBs.
*/
- if (phb->type == PNV_PHB_NPU)
- pnv_ioda_setup_dev_PEs(hose->bus);
- else
+ if (phb->type == PNV_PHB_NPU) {
+ /* PE#0 is needed for error reporting */
+ pnv_ioda_reserve_pe(phb, 0);
+ pnv_ioda_setup_npu_PEs(hose->bus);
+ } else
pnv_ioda_setup_PEs(hose->bus);
}
}
@@ -1612,7 +1673,10 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
/* Update peer npu devices */
if (pe->flags & PNV_IODA_PE_PEER)
- for (i = 0; pe->peers[i]; i++) {
+ for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+ if (!pe->peers[i])
+ continue;
+
linked_npu_dev = pe->peers[i]->pdev;
if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
dma_set_mask(&linked_npu_dev->dev, dma_mask);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ff4e42d9d259..2f55c86df703 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1,8 +1,6 @@
/*
* Support PCI/PCIe on PowerNV platforms
*
- * Currently supports only P5IOC2
- *
* Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
*
* This program is free software; you can redistribute it and/or
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index 6982f603fadc..be2ac5ce349f 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -1,4 +1,5 @@
-ccflags-y := -Werror -Wno-unused-const-variable
+ccflags-y := $(call cc-disable-warning, unused-const-variable)
+ccflags-$(CONFIG_PPC_WERROR) += -Werror
cxl-y += main.o file.o irq.o fault.o native.o
cxl-y += context.o sysfs.o debugfs.o pci.o trace.o
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a6543aefa299..ea3eeb7011e1 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -172,7 +172,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
if (task) {
ctx->pid = get_task_pid(task, PIDTYPE_PID);
- get_pid(ctx->pid);
+ ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
kernel = false;
}
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 6dde7a9d6a7e..262b88eac414 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -42,7 +42,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
spin_lock_init(&ctx->sste_lock);
ctx->afu = afu;
ctx->master = master;
- ctx->pid = NULL; /* Set in start work ioctl */
+ ctx->pid = ctx->glpid = NULL; /* Set in start work ioctl */
mutex_init(&ctx->mapping_lock);
ctx->mapping = mapping;
@@ -217,7 +217,11 @@ int __detach_context(struct cxl_context *ctx)
WARN_ON(cxl_detach_process(ctx) &&
cxl_adapter_link_ok(ctx->afu->adapter));
flush_work(&ctx->fault_work); /* Only needed for dedicated process */
+
+ /* release the reference to the group leader and mm handling pid */
put_pid(ctx->pid);
+ put_pid(ctx->glpid);
+
cxl_ctx_put();
return 0;
}
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 25ae57fa79b0..a521bc72cec2 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -445,6 +445,9 @@ struct cxl_context {
unsigned int sst_size, sst_lru;
wait_queue_head_t wq;
+ /* pid of the group leader associated with the pid */
+ struct pid *glpid;
+ /* use mm context associated with this pid for ds faults */
struct pid *pid;
spinlock_t lock; /* Protects pending_irq_mask, pending_fault and fault_addr */
/* Only used in PR mode */
diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
index 25a5418c55cb..81c3f75b7330 100644
--- a/drivers/misc/cxl/fault.c
+++ b/drivers/misc/cxl/fault.c
@@ -166,13 +166,92 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
}
+/*
+ * Returns the mm_struct corresponding to the context ctx via ctx->pid
+ * In case the task has exited we use the task group leader accessible
+ * via ctx->glpid to find the next task in the thread group that has a
+ * valid mm_struct associated with it. If a task with valid mm_struct
+ * is found the ctx->pid is updated to use the task struct for subsequent
+ * translations. In case no valid mm_struct is found in the task group to
+ * service the fault a NULL is returned.
+ */
+static struct mm_struct *get_mem_context(struct cxl_context *ctx)
+{
+ struct task_struct *task = NULL;
+ struct mm_struct *mm = NULL;
+ struct pid *old_pid = ctx->pid;
+
+ if (old_pid == NULL) {
+ pr_warn("%s: Invalid context for pe=%d\n",
+ __func__, ctx->pe);
+ return NULL;
+ }
+
+ task = get_pid_task(old_pid, PIDTYPE_PID);
+
+ /*
+ * pid_alive may look racy but this saves us from costly
+ * get_task_mm when the task is a zombie. In worst case
+ * we may think a task is alive, which is about to die
+ * but get_task_mm will return NULL.
+ */
+ if (task != NULL && pid_alive(task))
+ mm = get_task_mm(task);
+
+ /* release the task struct that was taken earlier */
+ if (task)
+ put_task_struct(task);
+ else
+ pr_devel("%s: Context owning pid=%i for pe=%i dead\n",
+ __func__, pid_nr(old_pid), ctx->pe);
+
+ /*
+ * If we couldn't find the mm context then use the group
+ * leader to iterate over the task group and find a task
+ * that gives us mm_struct.
+ */
+ if (unlikely(mm == NULL && ctx->glpid != NULL)) {
+
+ rcu_read_lock();
+ task = pid_task(ctx->glpid, PIDTYPE_PID);
+ if (task)
+ do {
+ mm = get_task_mm(task);
+ if (mm) {
+ ctx->pid = get_task_pid(task,
+ PIDTYPE_PID);
+ break;
+ }
+ task = next_thread(task);
+ } while (task && !thread_group_leader(task));
+ rcu_read_unlock();
+
+ /* check if we switched pid */
+ if (ctx->pid != old_pid) {
+ if (mm)
+ pr_devel("%s:pe=%i switch pid %i->%i\n",
+ __func__, ctx->pe, pid_nr(old_pid),
+ pid_nr(ctx->pid));
+ else
+ pr_devel("%s:Cannot find mm for pid=%i\n",
+ __func__, pid_nr(old_pid));
+
+ /* drop the reference to older pid */
+ put_pid(old_pid);
+ }
+ }
+
+ return mm;
+}
+
+
+
void cxl_handle_fault(struct work_struct *fault_work)
{
struct cxl_context *ctx =
container_of(fault_work, struct cxl_context, fault_work);
u64 dsisr = ctx->dsisr;
u64 dar = ctx->dar;
- struct task_struct *task = NULL;
struct mm_struct *mm = NULL;
if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
@@ -195,17 +274,17 @@ void cxl_handle_fault(struct work_struct *fault_work)
"DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
if (!ctx->kernel) {
- if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
- pr_devel("cxl_handle_fault unable to get task %i\n",
- pid_nr(ctx->pid));
+
+ mm = get_mem_context(ctx);
+ /* indicates all the thread in task group have exited */
+ if (mm == NULL) {
+ pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
+ __func__, ctx->pe, pid_nr(ctx->pid));
cxl_ack_ae(ctx);
return;
- }
- if (!(mm = get_task_mm(task))) {
- pr_devel("cxl_handle_fault unable to get mm %i\n",
- pid_nr(ctx->pid));
- cxl_ack_ae(ctx);
- goto out;
+ } else {
+ pr_devel("Handling page fault for pe=%d pid=%i\n",
+ ctx->pe, pid_nr(ctx->pid));
}
}
@@ -218,33 +297,22 @@ void cxl_handle_fault(struct work_struct *fault_work)
if (mm)
mmput(mm);
-out:
- if (task)
- put_task_struct(task);
}
static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
{
- int rc;
- struct task_struct *task;
struct mm_struct *mm;
- if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
- pr_devel("cxl_prefault_one unable to get task %i\n",
- pid_nr(ctx->pid));
- return;
- }
- if (!(mm = get_task_mm(task))) {
+ mm = get_mem_context(ctx);
+ if (mm == NULL) {
pr_devel("cxl_prefault_one unable to get mm %i\n",
pid_nr(ctx->pid));
- put_task_struct(task);
return;
}
- rc = cxl_fault_segment(ctx, mm, ea);
+ cxl_fault_segment(ctx, mm, ea);
mmput(mm);
- put_task_struct(task);
}
static u64 next_segment(u64 ea, u64 vsid)
@@ -263,18 +331,13 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
struct copro_slb slb;
struct vm_area_struct *vma;
int rc;
- struct task_struct *task;
struct mm_struct *mm;
- if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) {
- pr_devel("cxl_prefault_vma unable to get task %i\n",
- pid_nr(ctx->pid));
- return;
- }
- if (!(mm = get_task_mm(task))) {
+ mm = get_mem_context(ctx);
+ if (mm == NULL) {
pr_devel("cxl_prefault_vm unable to get mm %i\n",
pid_nr(ctx->pid));
- goto out1;
+ return;
}
down_read(&mm->mmap_sem);
@@ -295,8 +358,6 @@ static void cxl_prefault_vma(struct cxl_context *ctx)
up_read(&mm->mmap_sem);
mmput(mm);
-out1:
- put_task_struct(task);
}
void cxl_prefault(struct cxl_context *ctx, u64 wed)
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 5cc14599837d..783337d22f36 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -201,8 +201,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
* where a process (master, some daemon, etc) has opened the chardev on
* behalf of another process, so the AFU's mm gets bound to the process
* that performs this ioctl and not the process that opened the file.
+ * Also we grab the PID of the group leader so that if the task that
+ * has performed the attach operation exits the mm context of the
+ * process is still accessible.
*/
- ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID));
+ ctx->pid = get_task_pid(current, PIDTYPE_PID);
+ ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 85761d7eb333..4c1903f781fc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -138,6 +138,7 @@ static const struct pci_device_id cxl_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
+ { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
{ PCI_DEVICE_CLASS(0x120000, ~0), },
{ }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 826470d7f000..96e2486a6fc4 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -263,7 +263,8 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "powerpc") {
$local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)";
- $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:";
+ # See comment in the sparc64 section for why we use '\w'.
+ $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?\\w*?)>:";
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$";
if ($bits == 64) {