From ea678ac627e01daf5b4f1da24bf1d0c500e10898 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:00 +0530 Subject: powerpc64/ftrace: Add a field in paca to disable ftrace in unsafe code paths We have some C code that we call into from real mode where we cannot take any exceptions. Though the C functions themselves are mostly safe, if these functions are traced, there is a possibility that we may take an exception. For instance, in certain conditions, the ftrace code uses WARN(), which uses a 'trap' to do its job. For such scenarios, introduce a new field in paca 'ftrace_enabled', which is checked on ftrace entry before continuing. This field can then be set to zero to disable/pause ftrace, and set to a non-zero value to resume ftrace. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/setup_64.c | 3 +++ arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 14 ++++++++++++++ arch/powerpc/kernel/trace/ftrace_64_pg.S | 4 ++++ 4 files changed, 22 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6bee65f3cfd3..262c44a90ea1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -180,6 +180,7 @@ int main(void) OFFSET(PACAKMSR, paca_struct, kernel_msr); OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); + OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); #ifdef CONFIG_PPC_BOOK3S OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b78f142a4148..313136006d1c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -252,6 +252,9 @@ static void cpu_ready_for_interrupts(void) /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; + + /* We are now ok to enable ftrace */ + get_paca()->ftrace_enabled = 1; } unsigned long spr_default_dscr = 0; diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 3f3e81852422..ae1cbe783ab6 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -47,6 +47,12 @@ _GLOBAL(ftrace_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_10GPRS(2, r1) + + /* Ok to continue? */ + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace + SAVE_10GPRS(12, r1) SAVE_10GPRS(22, r1) @@ -168,6 +174,14 @@ _GLOBAL(ftrace_graph_stub) _GLOBAL(ftrace_stub) blr +ftrace_no_trace: + mflr r3 + mtctr r3 + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE + mtlr r0 + bctr + #ifdef CONFIG_LIVEPATCH /* * This function runs in the mcount context, between two functions. As diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index f095358da96e..b7ba51a0f3b6 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -16,6 +16,10 @@ #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL_TOC(ftrace_caller) + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beqlr + /* Taken from output of objdump from lib64/glibc */ mflr r3 ld r11, 0(r1) -- cgit v1.2.3 From d103978636c27fce216bbc8bb289981047b71bd4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:03 +0530 Subject: powerpc64/ftrace: Delay enabling ftrace on secondary cpus On the boot cpu, though we enable paca->ftrace_enabled in early_setup() (via cpu_ready_for_interrupts()), we don't start tracing until much later since ftrace is not initialized yet and since we only support DYNAMIC_FTRACE on powerpc. However, it is possible that ftrace has been initialized by the time some of the secondary cpus start up. In this case, we will try to trace some of the early boot code which can cause problems. To address this, move setting paca->ftrace_enabled from cpu_ready_for_interrupts() to early_setup() for the boot cpu, and towards the end of start_secondary() for secondary cpus. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 10 +++++++--- arch/powerpc/kernel/smp.c | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 313136006d1c..7a7ce8ad455e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -252,9 +252,6 @@ static void cpu_ready_for_interrupts(void) /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; - - /* We are now ok to enable ftrace */ - get_paca()->ftrace_enabled = 1; } unsigned long spr_default_dscr = 0; @@ -349,6 +346,13 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); + /* + * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it + * will only actually get enabled on the boot cpu much later once + * ftrace itself has been initialized. + */ + this_cpu_enable_ftrace(); + DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9ca7148b5881..9e711cdbe384 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,6 +59,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -1066,6 +1067,9 @@ void start_secondary(void *unused) local_irq_enable(); + /* We can enable ftrace for secondary cpus now */ + this_cpu_enable_ftrace(); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); BUG(); -- cgit v1.2.3 From 424ef0160f439feb2a1a6e796a281e2bfa7b6997 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:04 +0530 Subject: powerpc64/ftrace: Disable ftrace during hotplug Disable ftrace when a cpu is about to go offline. When the cpu is woken up, ftrace will get enabled in start_secondary(). Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9e711cdbe384..c96f8fbc1942 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1166,6 +1166,8 @@ int __cpu_disable(void) if (!smp_ops->cpu_disable) return -ENOSYS; + this_cpu_disable_ftrace(); + err = smp_ops->cpu_disable(); if (err) return err; @@ -1184,6 +1186,12 @@ void __cpu_die(unsigned int cpu) void cpu_die(void) { + /* + * Disable on the down path. This will be re-enabled by + * start_secondary() via start_secondary_resume() below + */ + this_cpu_disable_ftrace(); + if (ppc_md.cpu_die) ppc_md.cpu_die(); -- cgit v1.2.3 From 88b1a8547f4c00e55d54e081fc15e3980debf5c1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:06 +0530 Subject: powerpc64/kexec: Hard disable ftrace before switching to the new kernel If function_graph tracer is enabled during kexec, we see the below exception in the simulator: root@(none):/# kexec -e kvm: exiting hardware virtualization kexec_core: Starting new kernel [ 19.262020070,5] OPAL: Switch to big-endian OS kexec: Starting switchover sequence. Interrupt to 0xC000000000004380 from 0xC000000000004380 ** Execution stopped: Continuous Interrupt, Instruction caused exception, ** Now that we have a more effective way to completely disable ftrace on ppc64, let's also use that before switching to a new kernel during kexec. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 2694d078741d..936c7e2d421e 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -98,12 +98,14 @@ void machine_kexec(struct kimage *image) int save_ftrace_enabled; save_ftrace_enabled = __ftrace_enabled_save(); + this_cpu_disable_ftrace(); if (ppc_md.machine_kexec) ppc_md.machine_kexec(image); else default_machine_kexec(image); + this_cpu_enable_ftrace(); __ftrace_enabled_restore(save_ftrace_enabled); /* Fall back to normal restart if we're still alive. */ -- cgit v1.2.3 From 250122baed29d90c643be8809d75274336b98fb0 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:07 +0530 Subject: powerpc64/module: Tighten detection of mcount call sites with -mprofile-kernel For R_PPC64_REL24 relocations, we suppress emitting instructions for TOC load/restore in the relocation stub if the relocation is for _mcount() call when using -mprofile-kernel ABI. To detect this, we check if the preceding instructions are per the standard set of instructions emitted by gcc: either the two instruction sequence of 'mflr r0; std r0,16(r1)', or the more optimized variant of a single 'mflr r0'. This is not sufficient since nothing prevents users from hand coding sequences involving a 'mflr r0' followed by a 'bl'. For removing the toc save instruction from the stub, we additionally check if the symbol is "_mcount". Add the same check here as well. Also rename is_early_mcount_callsite() to is_mprofile_mcount_callsite() since that is what is being checked. The use of "early" is misleading since there is nothing involving this function that qualifies as early. Fixes: 153086644fd1f ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI") Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/module_64.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index a2636c250b7b..8413be31d6a4 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -463,8 +463,11 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, } #ifdef CC_USING_MPROFILE_KERNEL -static bool is_early_mcount_callsite(u32 *instruction) +static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) { + if (strcmp("_mcount", name)) + return false; + /* * Check if this is one of the -mprofile-kernel sequences. */ @@ -496,8 +499,7 @@ static void squash_toc_save_inst(const char *name, unsigned long addr) #else static void squash_toc_save_inst(const char *name, unsigned long addr) { } -/* without -mprofile-kernel, mcount calls are never early */ -static bool is_early_mcount_callsite(u32 *instruction) +static bool is_mprofile_mcount_callsite(const char *name, u32 *instruction) { return false; } @@ -505,11 +507,11 @@ static bool is_early_mcount_callsite(u32 *instruction) /* We expect a noop next: if it is, replace it with instruction to restore r2. */ -static int restore_r2(u32 *instruction, struct module *me) +static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; - if (is_early_mcount_callsite(prev_insn)) + if (is_mprofile_mcount_callsite(name, prev_insn)) return 1; /* @@ -650,7 +652,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, value = stub_for_addr(sechdrs, value, me); if (!value) return -ENOENT; - if (!restore_r2((u32 *)location + 1, me)) + if (!restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; squash_toc_save_inst(strtab + sym->st_name, value); -- cgit v1.2.3 From 9ef404236438bf4934386dc2aa34ba7f0a9e1934 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:08 +0530 Subject: powerpc64/ftrace: Use the generic version of ftrace_replace_code() Our implementation matches that of the generic version, which also handles FTRACE_UPDATE_MODIFY_CALL. So, remove our implementation in favor of the generic version. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/trace/ftrace.c | 36 ------------------------------------ 1 file changed, 36 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4741fe112f05..80667128db3d 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -485,42 +485,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) -{ - unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR; - int ret; - - ret = ftrace_update_record(rec, enable); - - switch (ret) { - case FTRACE_UPDATE_IGNORE: - return 0; - case FTRACE_UPDATE_MAKE_CALL: - return ftrace_make_call(rec, ftrace_addr); - case FTRACE_UPDATE_MAKE_NOP: - return ftrace_make_nop(NULL, rec, ftrace_addr); - } - - return 0; -} - -void ftrace_replace_code(int enable) -{ - struct ftrace_rec_iter *iter; - struct dyn_ftrace *rec; - int ret; - - for (iter = ftrace_rec_iter_start(); iter; - iter = ftrace_rec_iter_next(iter)) { - rec = ftrace_rec_iter_record(iter); - ret = __ftrace_replace_code(rec, enable); - if (ret) { - ftrace_bug(ret, rec); - return; - } - } -} - /* * Use the default ftrace_modify_all_code, but without * stop_machine(). -- cgit v1.2.3 From ae30cc05bed2fd7eb05e4fb53f412783f05ccb7b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 19 Apr 2018 12:34:09 +0530 Subject: powerpc64/ftrace: Implement support for ftrace_regs_caller() With -mprofile-kernel, we always save the full register state in ftrace_caller(). While this works, this is inefficient if we're not interested in the register state, such as when we're using the function tracer. Rename the existing ftrace_caller() as ftrace_regs_caller() and provide a simpler implementation for ftrace_caller() that is used when registers are not required to be saved. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ftrace.h | 2 - arch/powerpc/include/asm/module.h | 3 + arch/powerpc/kernel/module_64.c | 28 +++- arch/powerpc/kernel/trace/ftrace.c | 184 +++++++++++++++++++++++-- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 71 +++++++++- 5 files changed, 262 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 3b5e85a72e10..f0806a2fd451 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -49,8 +49,6 @@ extern void _mcount(void); #ifdef CONFIG_DYNAMIC_FTRACE -# define FTRACE_ADDR ((unsigned long)ftrace_caller) -# define FTRACE_REGS_ADDR FTRACE_ADDR static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 4f6573934792..18f7214d68b7 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -53,6 +53,9 @@ struct mod_arch_specific { #ifdef CONFIG_DYNAMIC_FTRACE unsigned long toc; unsigned long tramp; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + unsigned long tramp_regs; +#endif #endif /* For module function descriptor dereference */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 8413be31d6a4..f7667e2ebfcb 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -280,6 +280,10 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, #ifdef CONFIG_DYNAMIC_FTRACE /* make the trampoline to the ftrace_caller */ relocs++; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* an additional one for ftrace_regs_caller */ + relocs++; +#endif #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); @@ -765,7 +769,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * via the paca (in r13). The target (ftrace_caller()) is responsible for * saving and restoring the toc before returning. */ -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, + struct module *me, unsigned long addr) { struct ppc64_stub_entry *entry; unsigned int i, num_stubs; @@ -792,9 +797,10 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module memcpy(entry->jump, stub_insns, sizeof(stub_insns)); /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = (unsigned long)ftrace_caller - kernel_toc_addr(); + reladdr = addr - kernel_toc_addr(); if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name); + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); return 0; } @@ -802,22 +808,30 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module entry->jump[2] |= PPC_LO(reladdr); /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ - entry->funcdata = func_desc((unsigned long)ftrace_caller); + entry->funcdata = func_desc(addr); entry->magic = STUB_MAGIC; return (unsigned long)entry; } #else -static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, + struct module *me, unsigned long addr) { - return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me); + return stub_for_addr(sechdrs, addr, me); } #endif int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.toc = my_r2(sechdrs, mod); - mod->arch.tramp = create_ftrace_stub(sechdrs, mod); + mod->arch.tramp = create_ftrace_stub(sechdrs, mod, + (unsigned long)ftrace_caller); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + mod->arch.tramp_regs = create_ftrace_stub(sechdrs, mod, + (unsigned long)ftrace_regs_caller); + if (!mod->arch.tramp_regs) + return -ENOENT; +#endif if (!mod->arch.tramp) return -ENOENT; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 80667128db3d..79d2924e75d5 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -357,6 +357,8 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned int op[2]; void *ip = (void *)rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; /* read where this goes */ if (probe_kernel_read(op, ip, sizeof(op))) @@ -368,19 +370,44 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { + /* If we never set up ftrace trampoline(s), then bail */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else + if (!mod->arch.tramp) { +#endif pr_err("No ftrace trampoline\n"); return -EINVAL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else +#endif + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + /* Ensure branch is within 24 bits */ - if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + if (!create_branch(ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -388,14 +415,6 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return 0; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) -{ - return ftrace_make_call(rec, addr); -} -#endif - #else /* !CONFIG_PPC64: */ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) @@ -472,6 +491,137 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) #endif /* CONFIG_MODULES */ } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_MODULES +static int +__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned int op; + unsigned long ip = rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; + + /* If we never set up ftrace trampolines, then bail */ + if (!mod->arch.tramp || !mod->arch.tramp_regs) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + entry = ppc_global_function_entry((void *)old_addr); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (tramp != entry) { + /* old_addr is not within range, so we must have used a trampoline */ + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + } + + /* The new target may be within range */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + if (patch_branch((unsigned int *)ip, addr, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; + } + + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + /* Ensure branch is within 24 bits */ + if (!create_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("Branch out of range\n"); + return -EINVAL; + } + + if (patch_branch((unsigned int *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#endif + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned int old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { + /* within range */ + old = ftrace_call_replace(ip, old_addr, 1); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } + +#ifdef CONFIG_MODULES + /* + * Out of range jumps are called from modules. + */ + if (!rec->arch.mod) { + pr_err("No module loaded\n"); + return -EINVAL; + } + + return __ftrace_modify_call(rec, old_addr, addr); +#else + /* We should not get here without modules */ + return -EINVAL; +#endif /* CONFIG_MODULES */ +} +#endif + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -482,6 +632,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func) new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + old = *(unsigned int *)&ftrace_regs_call; + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + } +#endif + return ret; } diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index ae1cbe783ab6..ed9d7a46c3af 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -20,8 +20,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE /* * - * ftrace_caller() is the function that replaces _mcount() when ftrace is - * active. + * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() + * when ftrace is active. * * We arrive here after a function A calls function B, and we are the trace * function for B. When we enter r1 points to A's stack frame, B has not yet @@ -37,7 +37,7 @@ * Our job is to save the register state into a struct pt_regs (on the stack) * and then arrange for the ftrace function to be called. */ -_GLOBAL(ftrace_caller) +_GLOBAL(ftrace_regs_caller) /* Save the original return address in A's stack frame */ std r0,LRSAVE(r1) @@ -100,8 +100,8 @@ _GLOBAL(ftrace_caller) addi r6, r1 ,STACK_FRAME_OVERHEAD /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: +.globl ftrace_regs_call +ftrace_regs_call: bl ftrace_stub nop @@ -162,6 +162,7 @@ ftrace_call: bne- livepatch_handler #endif +ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -182,6 +183,66 @@ ftrace_no_trace: mtlr r0 bctr +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0, LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1, -SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(3, r1) + + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace + + /* Get the _mcount() call site out of LR */ + mflr r7 + std r7, _NIP(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3, r2, function_trace_op@toc@ha + addi r3, r3, function_trace_op@toc@l + ld r5, 0(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Set pt_regs to NULL */ + li r6, 0 + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + ld r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + REST_8GPRS(3,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + + /* Reload original LR */ + ld r0, LRSAVE(r1) + mtlr r0 + + /* Handle function_graph or go back */ + b ftrace_caller_common + #ifdef CONFIG_LIVEPATCH /* * This function runs in the mcount context, between two functions. As -- cgit v1.2.3 From 0c0c52306f4792a41d8a86e7c5d30cd4f442e532 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 27 Mar 2018 15:29:06 +1100 Subject: powerpc: Only support DYNAMIC_FTRACE not static We've had dynamic ftrace support for over 9 years since Steve first wrote it, all the distros use dynamic, and static is basically untested these days, so drop support for static ftrace. Signed-off-by: Michael Ellerman Acked-by: Steven Rostedt (VMware) Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/ftrace.h | 4 +--- arch/powerpc/kernel/trace/ftrace.c | 2 -- arch/powerpc/kernel/trace/ftrace_32.S | 20 ------------------ arch/powerpc/kernel/trace/ftrace_64.S | 29 -------------------------- arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 3 --- arch/powerpc/kernel/trace/ftrace_64_pg.S | 2 -- 7 files changed, 2 insertions(+), 59 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c32a181a7cbb..ebb90f09e74f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -162,6 +162,7 @@ config PPC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_ATOMIC64 if PPC32 diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f0806a2fd451..fc3a2203c566 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -48,7 +48,6 @@ #else /* !__ASSEMBLY__ */ extern void _mcount(void); -#ifdef CONFIG_DYNAMIC_FTRACE static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ @@ -58,13 +57,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) struct dyn_arch_ftrace { struct module *mod; }; -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#endif +#endif /* CONFIG_FUNCTION_TRACER */ #ifndef __ASSEMBLY__ #if defined(CONFIG_FTRACE_SYSCALLS) && defined(PPC64_ELF_ABI_v1) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 79d2924e75d5..c076a32093fd 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -662,7 +662,6 @@ int __init ftrace_dyn_arch_init(void) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_graph_call(void); extern void ftrace_graph_stub(void); @@ -691,7 +690,6 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_modify_code(ip, old, new); } -#endif /* CONFIG_DYNAMIC_FTRACE */ /* * Hook the return address and push it in the stack of return addrs diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index afef2c076282..2c29098f630f 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -14,7 +14,6 @@ #include #include -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) /* @@ -47,26 +46,7 @@ _GLOBAL(ftrace_graph_stub) MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr -#else -_GLOBAL(mcount) -_GLOBAL(_mcount) - - MCOUNT_SAVE_FRAME - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5, ftrace_trace_function) - lwz r5,0(r5) - - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - MCOUNT_RESTORE_FRAME - bctr -#endif EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S index e5ccea19821e..e25f77c10a72 100644 --- a/arch/powerpc/kernel/trace/ftrace_64.S +++ b/arch/powerpc/kernel/trace/ftrace_64.S @@ -14,7 +14,6 @@ #include #include -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) @@ -23,34 +22,6 @@ EXPORT_SYMBOL(_mcount) mtlr r0 bctr -#else /* CONFIG_DYNAMIC_FTRACE */ -_GLOBAL_TOC(_mcount) -EXPORT_SYMBOL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - - subi r3, r3, MCOUNT_INSN_SIZE - LOAD_REG_ADDR(r5,ftrace_trace_function) - ld r5,0(r5) - ld r5,0(r5) - mtctr r5 - bctrl - nop - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - b ftrace_graph_caller -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 -_GLOBAL(ftrace_stub) - blr -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) /* need to save return values */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index ed9d7a46c3af..9a5b5a513604 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -17,7 +17,6 @@ #include #include -#ifdef CONFIG_DYNAMIC_FTRACE /* * * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() @@ -311,8 +310,6 @@ livepatch_handler: blr #endif /* CONFIG_LIVEPATCH */ -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) stdu r1, -112(r1) diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index b7ba51a0f3b6..4c515c4023de 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -14,7 +14,6 @@ #include #include -#ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL_TOC(ftrace_caller) lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -43,7 +42,6 @@ _GLOBAL(ftrace_graph_stub) _GLOBAL(ftrace_stub) blr -#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) -- cgit v1.2.3 From b71a693d3db3abd1ddf7d29be967a1180c3ebb22 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 10 Apr 2018 19:11:16 +0530 Subject: powerpc/fadump: exclude memory holes while reserving memory in second kernel The second kernel, during early boot after the crash, reserves rest of the memory above boot memory size to make sure it does not touch any of the dump memory area. It uses memblock_reserve() that reserves the specified memory region irrespective of memory holes present within that region. There are chances where previous kernel would have hot removed some of its memory leaving memory holes behind. In such cases fadump kernel reports incorrect number of reserved pages through arch_reserved_kernel_pages() hook causing kernel to hang or panic. Fix this by excluding memory holes while reserving rest of the memory above boot memory size during second kernel boot after crash. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3c2c2688918f..bea8d5fe3b6e 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -335,6 +335,26 @@ static unsigned long get_fadump_area_size(void) return size; } +static void __init fadump_reserve_crash_area(unsigned long base, + unsigned long size) +{ + struct memblock_region *reg; + unsigned long mstart, mend, msize; + + for_each_memblock(memory, reg) { + mstart = max_t(unsigned long, base, reg->base); + mend = reg->base + reg->size; + mend = min(base + size, mend); + + if (mstart < mend) { + msize = mend - mstart; + memblock_reserve(mstart, msize); + pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", + (msize >> 20), mstart); + } + } +} + int __init fadump_reserve_mem(void) { unsigned long base, size, memory_boundary; @@ -380,7 +400,8 @@ int __init fadump_reserve_mem(void) memory_boundary = memblock_end_of_DRAM(); if (fw_dump.dump_active) { - printk(KERN_INFO "Firmware-assisted dump is active.\n"); + pr_info("Firmware-assisted dump is active.\n"); + /* * If last boot has crashed then reserve all the memory * above boot_memory_size so that we don't touch it until @@ -389,11 +410,7 @@ int __init fadump_reserve_mem(void) */ base = fw_dump.boot_memory_size; size = memory_boundary - base; - memblock_reserve(base, size); - printk(KERN_INFO "Reserved %ldMB of memory at %ldMB " - "for saving crash dump\n", - (unsigned long)(size >> 20), - (unsigned long)(base >> 20)); + fadump_reserve_crash_area(base, size); fw_dump.fadumphdr_addr = be64_to_cpu(fdm_active->rmr_region.destination_address) + -- cgit v1.2.3 From 8597538712ebd90bc83dfb0b3b40398a0c53ad5b Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Tue, 10 Apr 2018 19:11:31 +0530 Subject: powerpc/fadump: Do not use hugepages when fadump is active FADump capture kernel boots in restricted memory environment preserving the context of previous kernel to save vmcore. Supporting hugepages in such environment makes things unnecessarily complicated, as hugepages need memory set aside for them. This means most of the capture kernel's memory is used in supporting hugepages. In most cases, this results in out-of-memory issues while booting FADump capture kernel. But hugepages are not of much use in capture kernel whose only job is to save vmcore. So, disabling hugepages support, when fadump is active, is a reliable solution for the out of memory issues. Introducing a flag variable to disable HugeTLB support when fadump is active. Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/page.h | 1 + arch/powerpc/kernel/fadump.c | 8 ++++++++ arch/powerpc/mm/hash_utils_64.c | 6 ++++-- arch/powerpc/mm/hugetlbpage.c | 7 +++++++ 4 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index dec9ce5ba8af..db7be0779d55 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -39,6 +39,7 @@ #ifndef __ASSEMBLY__ #ifdef CONFIG_HUGETLB_PAGE +extern bool hugetlb_disabled; extern unsigned int HPAGE_SHIFT; #else #define HPAGE_SHIFT PAGE_SHIFT diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index bea8d5fe3b6e..8ceabef40d3d 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -402,6 +402,14 @@ int __init fadump_reserve_mem(void) if (fw_dump.dump_active) { pr_info("Firmware-assisted dump is active.\n"); +#ifdef CONFIG_HUGETLB_PAGE + /* + * FADump capture kernel doesn't care much about hugepages. + * In fact, handling hugepages in capture kernel is asking for + * trouble. So, disable HugeTLB support when fadump is active. + */ + hugetlb_disabled = true; +#endif /* * If last boot has crashed then reserve all the memory * above boot_memory_size so that we don't touch it until diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0bd3790d35df..5beeec6fbb9b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -572,8 +572,10 @@ static void __init htab_scan_page_sizes(void) } #ifdef CONFIG_HUGETLB_PAGE - /* Reserve 16G huge page memory sections for huge pages */ - of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); + if (!hugetlb_disabled) { + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); + } #endif /* CONFIG_HUGETLB_PAGE */ } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f1153f8254e3..2a4b1bf8bde6 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -35,6 +35,8 @@ #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 +bool hugetlb_disabled = false; + unsigned int HPAGE_SHIFT; EXPORT_SYMBOL(HPAGE_SHIFT); @@ -651,6 +653,11 @@ static int __init hugetlbpage_init(void) { int psize; + if (hugetlb_disabled) { + pr_info("HugeTLB support is disabled!\n"); + return 0; + } + #if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; -- cgit v1.2.3 From 722cde76d68e8cc4f3de42e71c82fd40dea4f7b9 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 27 Apr 2018 11:53:18 +0530 Subject: powerpc/fadump: Unregister fadump on kexec down path. Unregister fadump on kexec down path otherwise the fadump registration in new kexec-ed kernel complains that fadump is already registered. This makes new kernel to continue using fadump registered by previous kernel which may lead to invalid vmcore generation. Hence this patch fixes this issue by un-registering fadump in fadump_cleanup() which is called during kexec path so that new kernel can register fadump with new valid values. Fixes: b500afff11f6 ("fadump: Invalidate registration and release reserved memory for general use.") Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8ceabef40d3d..07e8396d472b 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1180,6 +1180,9 @@ void fadump_cleanup(void) init_fadump_mem_struct(&fdm, be64_to_cpu(fdm_active->cpu_state_data.destination_address)); fadump_invalidate_dump(&fdm); + } else if (fw_dump.dump_registered) { + /* Un-register Firmware-assisted dump if it was registered. */ + fadump_unregister_dump(&fdm); } } -- cgit v1.2.3 From 7c18659dd498b25c6651ba83d4267ba7f9458c9c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 6 May 2018 13:23:46 +0200 Subject: powerpc/watchdog: fix typo 'can by' to 'can be' Signed-off-by: Wolfram Sang Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 6256dc3b0087..591f7c3af4ff 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -64,7 +64,7 @@ * means the CPU(s) with their bit still set in the pending mask have had * their heartbeat stop, and action is taken. * - * Some platforms implement true NMI IPIs, which can by used by the SMP + * Some platforms implement true NMI IPIs, which can be used by the SMP * watchdog to detect an unresponsive CPU and pull it out of its stuck * state with the NMI IPI, to get crash/debug data from it. This way the * SMP watchdog can detect hardware interrupts off lockups. -- cgit v1.2.3 From d2b04b0c78881ef1c051cc3a4a2c61b7bf91aa53 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Tue, 8 May 2018 09:05:14 +0200 Subject: powerpc/64/kexec: fix race in kexec when XIVE is shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kexec_state KEXEC_STATE_IRQS_OFF barrier is reached by all secondary CPUs before the kexec_cpu_down() operation is called on secondaries. This can raise conflicts and provoque errors in the XIVE hcalls when XIVE is shutdown with H_INT_RESET on the primary CPU. To synchronize the kexec_cpu_down() operations and make sure the secondaries have completed their task before the primary starts doing the same, let's move the primary kexec_cpu_down() after the KEXEC_STATE_REAL_MODE barrier. This change of the ending sequence of kexec is mostly useful on the pseries platform but it impacts also the powernv, ps3 and 85xx platforms. powernv can be easily tested and fixed but some caution is required for the other two. Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/machine_kexec_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 1044bf15d5ed..a0f6f45005bd 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -231,16 +231,16 @@ static void kexec_prepare_cpus(void) /* we are sure every CPU has IRQs off at this point */ kexec_all_irq_disabled = 1; - /* after we tell the others to go down */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - /* * Before removing MMU mappings make sure all CPUs have entered real * mode: */ kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); + /* after we tell the others to go down */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + put_cpu(); } -- cgit v1.2.3 From 5a951c4e7e8df5d6df52bace1b4ff327885584d6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 17:25:59 +1000 Subject: powerpc/watchdog: don't update the watchdog timestamp if a lockup is detected The watchdog heartbeat timestamp is updated when the local heartbeat timer fires (or touch_nmi_watchdog() is called). This is an interesting data point, so don't overwrite it when the soft-NMI interrupt detects a hard lockup. That code came from a pre- merge version to prevent hard lockup messages flood, but that's taken care of with the stuck CPU logic now, so there is no reason to update the heartbeat timestamp here. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 591f7c3af4ff..b2d3bdff03aa 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -245,8 +245,6 @@ void soft_nmi_interrupt(struct pt_regs *regs) tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { - per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); -- cgit v1.2.3 From 4e49226ea8e1810d2c14d9e92a290bad239b512b Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 17:26:00 +1000 Subject: powerpc/watchdog: provide more data in watchdog messages Provide timebase and timebase of last heartbeat in watchdog lockup messages. Also provide a stack trace of when a CPU becomes un-stuck, which can be useful -- it could be where irqs are re-enabled, so it may be the end of the critical section which is responsible for the latency which is useful information. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b2d3bdff03aa..1d82274f7e9f 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -111,7 +111,13 @@ static inline void wd_smp_unlock(unsigned long *flags) static void wd_lockup_ipi(struct pt_regs *regs) { - pr_emerg("CPU %d Hard LOCKUP\n", raw_smp_processor_id()); + int cpu = raw_smp_processor_id(); + u64 tb = get_tb(); + + pr_emerg("CPU %d Hard LOCKUP\n", cpu); + pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, per_cpu(wd_timer_tb, cpu), + tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000); print_modules(); print_irqtrace_events(current); if (regs) @@ -154,6 +160,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, wd_smp_last_reset_tb, + tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); if (!sysctl_hardlockup_all_cpu_backtrace) { /* @@ -194,10 +203,19 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) { if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { + struct pt_regs *regs = get_irq_regs(); unsigned long flags; - pr_emerg("CPU %d became unstuck\n", cpu); wd_smp_lock(&flags); + + pr_emerg("CPU %d became unstuck TB:%lld\n", + cpu, tb); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); } @@ -252,7 +270,11 @@ void soft_nmi_interrupt(struct pt_regs *regs) } set_cpu_stuck(cpu, tb); - pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); + pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", + cpu, (void *)regs->nip); + pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n", + cpu, tb, per_cpu(wd_timer_tb, cpu), + tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000); print_modules(); print_irqtrace_events(current); show_regs(regs); -- cgit v1.2.3 From df78d3f6148092d33a9a24c7a9cfac3d0220b484 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 4 May 2018 14:38:34 +0200 Subject: powerpc/livepatch: Implement reliable stack tracing for the consistency model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Power Architecture 64-Bit ELF V2 ABI" says in section 2.3.2.3: [...] There are several rules that must be adhered to in order to ensure reliable and consistent call chain backtracing: * Before a function calls any other function, it shall establish its own stack frame, whose size shall be a multiple of 16 bytes. – In instances where a function’s prologue creates a stack frame, the back-chain word of the stack frame shall be updated atomically with the value of the stack pointer (r1) when a back chain is implemented. (This must be supported as default by all ELF V2 ABI-compliant environments.) [...] – The function shall save the link register that contains its return address in the LR save doubleword of its caller’s stack frame before calling another function. To me this sounds like the equivalent of HAVE_RELIABLE_STACKTRACE. This patch may be unneccessarily limited to ppc64le, but OTOH the only user of this flag so far is livepatching, which is only implemented on PPCs with 64-LE, a.k.a. ELF ABI v2. Feel free to add other ppc variants, but so far only ppc64le got tested. This change also implements save_stack_trace_tsk_reliable() for ppc64le that checks for the above conditions, where possible. Signed-off-by: Torsten Duwe Signed-off-by: Nicolai Stange Acked-by: Josh Poimboeuf Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/stacktrace.c | 119 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ebb90f09e74f..23247fa551e7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -221,6 +221,7 @@ config PPC select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE if SMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE if PPC64 && CPU_LITTLE_ENDIAN select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index d534ed901538..26a50603177c 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -2,7 +2,7 @@ * Stack trace utility * * Copyright 2008 Christoph Hellwig, IBM Corp. - * + * Copyright 2018 SUSE Linux GmbH * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -11,11 +11,16 @@ */ #include +#include +#include #include #include +#include #include #include #include +#include +#include /* * Save stack-backtrace addresses into a stack_trace buffer. @@ -76,3 +81,115 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) save_context_stack(trace, regs->gpr[1], current, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_regs); + +#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE +int +save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace) +{ + unsigned long sp; + unsigned long stack_page = (unsigned long)task_stack_page(tsk); + unsigned long stack_end; + int graph_idx = 0; + + /* + * The last frame (unwinding first) may not yet have saved + * its LR onto the stack. + */ + int firstframe = 1; + + if (tsk == current) + sp = current_stack_pointer(); + else + sp = tsk->thread.ksp; + + stack_end = stack_page + THREAD_SIZE; + if (!is_idle_task(tsk)) { + /* + * For user tasks, this is the SP value loaded on + * kernel entry, see "PACAKSAVE(r13)" in _switch() and + * system_call_common()/EXCEPTION_PROLOG_COMMON(). + * + * Likewise for non-swapper kernel threads, + * this also happens to be the top of the stack + * as setup by copy_thread(). + * + * Note that stack backlinks are not properly setup by + * copy_thread() and thus, a forked task() will have + * an unreliable stack trace until it's been + * _switch()'ed to for the first time. + */ + stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); + } else { + /* + * idle tasks have a custom stack layout, + * c.f. cpu_idle_thread_init(). + */ + stack_end -= STACK_FRAME_OVERHEAD; + } + + if (sp < stack_page + sizeof(struct thread_struct) || + sp > stack_end - STACK_FRAME_MIN_SIZE) { + return 1; + } + + for (;;) { + unsigned long *stack = (unsigned long *) sp; + unsigned long newsp, ip; + + /* sanity check: ABI requires SP to be aligned 16 bytes. */ + if (sp & 0xF) + return 1; + + /* Mark stacktraces with exception frames as unreliable. */ + if (sp <= stack_end - STACK_INT_FRAME_SIZE && + stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + return 1; + } + + newsp = stack[0]; + /* Stack grows downwards; unwinder may only go up. */ + if (newsp <= sp) + return 1; + + if (newsp != stack_end && + newsp > stack_end - STACK_FRAME_MIN_SIZE) { + return 1; /* invalid backlink, too far up. */ + } + + /* Examine the saved LR: it must point into kernel code. */ + ip = stack[STACK_FRAME_LR_SAVE]; + if (!firstframe && !__kernel_text_address(ip)) + return 1; + firstframe = 0; + + /* + * FIXME: IMHO these tests do not belong in + * arch-dependent code, they are generic. + */ + ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL); + + /* + * Mark stacktraces with kretprobed functions on them + * as unreliable. + */ + if (ip == (unsigned long)kretprobe_trampoline) + return 1; + + if (!trace->skip) + trace->entries[trace->nr_entries++] = ip; + else + trace->skip--; + + if (newsp == stack_end) + break; + + if (trace->nr_entries >= trace->max_entries) + return -E2BIG; + + sp = newsp; + } + return 0; +} +EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable); +#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ -- cgit v1.2.3 From 3691d6145585f52a6292c158e72bcde59df8e0a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 May 2018 23:20:46 +1000 Subject: powerpc/syscalls: Switch trivial cases to SYSCALL_DEFINE Signed-off-by: Al Viro Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_32.c | 6 +++--- arch/powerpc/kernel/pci_64.c | 4 ++-- arch/powerpc/mm/subpage-prot.c | 4 +++- arch/powerpc/platforms/cell/spu_syscalls.c | 3 ++- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 85ad2f78b889..af36e46c3ed6 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -283,7 +284,8 @@ pci_bus_to_hose(int bus) * Note that the returned IO or memory base is a physical address */ -long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) +SYSCALL_DEFINE3(pciconfig_iobase, long, which, + unsigned long, bus, unsigned long, devfn) { struct pci_controller* hose; long result = -EOPNOTSUPP; @@ -307,5 +309,3 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) return result; } - - diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 15ce0306b092..dff28f903512 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -203,8 +203,8 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) #define IOBASE_ISA_IO 3 #define IOBASE_ISA_MEM 4 -long sys_pciconfig_iobase(long which, unsigned long in_bus, - unsigned long in_devfn) +SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, + unsigned long, in_devfn) { struct pci_controller* hose; struct pci_bus *tmp_bus, *bus = NULL; diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index f14a07c2fb90..9d16ee251fc0 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -185,7 +186,8 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, * in a 2-bit field won't allow writes to a page that is otherwise * write-protected. */ -long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) +SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, + unsigned long, len, u32 __user *, map) { struct mm_struct *mm = current->mm; struct subpage_prot_table *spt = &mm->context.spt; diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 5e6e0bad6db6..263413a34823 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -90,7 +91,7 @@ SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags, return ret; } -asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus) +SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus) { long ret; struct fd arg; -- cgit v1.2.3 From f3675644e172301e88354dc7bfca96c124301145 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 May 2018 23:20:47 +1000 Subject: powerpc/syscalls: signal_{32, 64} - switch to SYSCALL_DEFINE Signed-off-by: Al Viro [mpe: Fix sys_debug_setcontext() prototype to return long] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 14 ++++------- arch/powerpc/kernel/signal.h | 6 ++--- arch/powerpc/kernel/signal_32.c | 40 ++++++++++++++++++------------- arch/powerpc/kernel/signal_64.c | 15 ++++-------- 4 files changed, 35 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 068760d61e7e..6f661e3757c8 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -80,18 +80,12 @@ void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); /* signals, syscalls and interrupts */ -#ifdef CONFIG_PPC64 -int sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size, long r6, long r7, long r8, struct pt_regs *regs); -#else long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, - int ctx_size, int r6, int r7, int r8, struct pt_regs *regs); -int sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg, - int r6, int r7, int r8, - struct pt_regs *regs); + long ctx_size); +#ifdef CONFIG_PPC32 +long sys_debug_setcontext(struct ucontext __user *ctx, + int ndbg, struct sig_dbg_op __user *dbg); int ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp); unsigned long __init early_init(unsigned long dt_ptr); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index a6467f843acf..800433685888 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -49,10 +49,8 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ -extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs); -extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs); +extern long sys_rt_sigreturn(void); +extern long sys_sigreturn(void); static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 492f03451877..9cf8a03d3bc7 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -26,8 +26,8 @@ #include #include #include -#ifdef CONFIG_PPC64 #include +#ifdef CONFIG_PPC64 #include #else #include @@ -57,10 +57,6 @@ #ifdef CONFIG_PPC64 -#define sys_rt_sigreturn compat_sys_rt_sigreturn -#define sys_swapcontext compat_sys_swapcontext -#define sys_sigreturn compat_sys_sigreturn - #define old_sigaction old_sigaction32 #define sigcontext sigcontext32 #define mcontext mcontext32 @@ -1041,10 +1037,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp, } #endif -long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, int, ctx_size) +#else +SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, long, ctx_size) +#endif { + struct pt_regs *regs = current_pt_regs(); unsigned char tmp __maybe_unused; int ctx_has_vsx_region = 0; @@ -1132,10 +1133,14 @@ long sys_swapcontext(struct ucontext __user *old_ctx, return 0; } -long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs) +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) +#else +SYSCALL_DEFINE0(rt_sigreturn) +#endif { struct rt_sigframe __user *rt_sf; + struct pt_regs *regs = current_pt_regs(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM struct ucontext __user *uc_transact; unsigned long msr_hi; @@ -1224,11 +1229,10 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, } #ifdef CONFIG_PPC32 -int sys_debug_setcontext(struct ucontext __user *ctx, - int ndbg, struct sig_dbg_op __user *dbg, - int r6, int r7, int r8, - struct pt_regs *regs) +SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, + int, ndbg, struct sig_dbg_op __user *, dbg) { + struct pt_regs *regs = current_pt_regs(); struct sig_dbg_op op; int i; unsigned char tmp __maybe_unused; @@ -1419,9 +1423,13 @@ badframe: /* * Do a signal return; undo the signal stack. */ -long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, - struct pt_regs *regs) +#ifdef CONFIG_PPC64 +COMPAT_SYSCALL_DEFINE0(sigreturn) +#else +SYSCALL_DEFINE0(sigreturn) +#endif { + struct pt_regs *regs = current_pt_regs(); struct sigframe __user *sf; struct sigcontext __user *sc; struct sigcontext sigctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 720117690822..83d51bf586c7 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -624,17 +625,14 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) /* * Handle {get,set,swap}_context operations */ -int sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - long ctx_size, long r6, long r7, long r8, struct pt_regs *regs) +SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, + struct ucontext __user *, new_ctx, long, ctx_size) { unsigned char tmp; sigset_t set; unsigned long new_msr = 0; int ctx_has_vsx_region = 0; - BUG_ON(regs != current->thread.regs); - if (new_ctx && get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) return -EFAULT; @@ -698,18 +696,15 @@ int sys_swapcontext(struct ucontext __user *old_ctx, * Do a signal return; undo the signal stack. */ -int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7, unsigned long r8, - struct pt_regs *regs) +SYSCALL_DEFINE0(rt_sigreturn) { + struct pt_regs *regs = current_pt_regs(); struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; sigset_t set; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM unsigned long msr; #endif - BUG_ON(current->thread.regs != regs); - /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; -- cgit v1.2.3 From 4c392e6591e3257ebd08210e1ac0a175eefd0168 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 May 2018 23:20:48 +1000 Subject: powerpc/syscalls: switch rtas(2) to SYSCALL_DEFINE Signed-off-by: Al Viro [mpe: Update sys_ni.c for s/ppc_rtas/sys_rtas/] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/syscalls.h | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/kernel/rtas.c | 3 ++- arch/powerpc/kernel/systbl.S | 1 - arch/powerpc/kernel/systbl_chk.c | 1 - arch/powerpc/platforms/cell/spu_callbacks.c | 1 - kernel/sys_ni.c | 2 +- 7 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h index 1b90a3516a35..398171fdcd9f 100644 --- a/arch/powerpc/include/asm/syscalls.h +++ b/arch/powerpc/include/asm/syscalls.h @@ -16,7 +16,7 @@ asmlinkage long sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); asmlinkage long ppc64_personality(unsigned long personality); -asmlinkage int ppc_rtas(struct rtas_args __user *uargs); +asmlinkage long sys_rtas(struct rtas_args __user *uargs); #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_SYSCALLS_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index d61f9c96d916..b91701c0711a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -260,7 +260,7 @@ COMPAT_SYS_SPU(utimes) COMPAT_SYS_SPU(statfs64) COMPAT_SYS_SPU(fstatfs64) SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64) -PPC_SYS_SPU(rtas) +SYSCALL_SPU(rtas) OLDSYS(debug_setcontext) SYSCALL(ni_syscall) COMPAT_SYS(migrate_pages) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 3f1c4fcbe0aa..8afd146bc9c7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1050,7 +1051,7 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, } /* We assume to be passed big endian arguments */ -asmlinkage int ppc_rtas(struct rtas_args __user *uargs) +SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { struct rtas_args args; unsigned long flags; diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 7ccb7f81f8db..c7d5216d91d7 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -35,7 +35,6 @@ #endif #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) -#define PPC_SYS_SPU(func) PPC_SYS(func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) .section .rodata,"a" diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c index 55323a620cfe..28476e811644 100644 --- a/arch/powerpc/kernel/systbl_chk.c +++ b/arch/powerpc/kernel/systbl_chk.c @@ -31,7 +31,6 @@ #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) -#define PPC_SYS_SPU(func) PPC_SYS(func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) /* Just insert a marker for ni_syscalls */ diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index a494028b2cdf..d5bb8c8d769a 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -44,7 +44,6 @@ static void *spu_syscall_table[] = { #define SYSCALL_SPU(func) sys_##func, #define COMPAT_SYS_SPU(func) sys_##func, -#define PPC_SYS_SPU(func) ppc_##func, #define SYSX_SPU(f, f3264, f32) f, #include diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 9791364925dc..3751a511e2b8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -365,7 +365,7 @@ COND_SYSCALL(s390_pci_mmio_write); COND_SYSCALL_COMPAT(s390_ipc); /* powerpc */ -cond_syscall(ppc_rtas); +COND_SYSCALL(rtas); COND_SYSCALL(spu_run); COND_SYSCALL(spu_create); COND_SYSCALL(subpage_prot); -- cgit v1.2.3 From 454d7ef81ad2dc3be2bede61f0703f0e69f21dd3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 23:20:49 +1000 Subject: powerpc/syscalls: Add COMPAT_SPU_NEW() macro Currently the select system call is wired up with the SYSX_SPU() macro. The SYSX_SPU() is not handled by systbl_chk.c, which means the syscall number for select is not checked. That hides the fact that the syscall number for select is actually __NR__newselect not __NR_select. In a following patch we'd like to drop ppc32_select() which means select will become a regular COMPAT_SYS_SPU() syscall. But COMPAT_SYS_SPU() can't deal with the fact that the syscall number is actually __NR__newselect. We also can't just redefine __NR_select because that's still used for the old select call. So add a new COMPAT_NEW_SPU() that does the same thing as COMPAT_SYS_SPU() except it encodes that we're using the new number. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/systbl.S | 1 + arch/powerpc/kernel/systbl_chk.c | 1 + arch/powerpc/platforms/cell/spu_callbacks.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index c7d5216d91d7..919a32746ede 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -35,6 +35,7 @@ #endif #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) +#define COMPAT_SPU_NEW(func) COMPAT_SYS(func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) .section .rodata,"a" diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c index 28476e811644..4653258722ac 100644 --- a/arch/powerpc/kernel/systbl_chk.c +++ b/arch/powerpc/kernel/systbl_chk.c @@ -31,6 +31,7 @@ #define SYSCALL_SPU(func) SYSCALL(func) #define COMPAT_SYS_SPU(func) COMPAT_SYS(func) +#define COMPAT_SPU_NEW(func) COMPAT_SYS(_new##func) #define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32) /* Just insert a marker for ni_syscalls */ diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index d5bb8c8d769a..8ae86200ef6c 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -44,6 +44,7 @@ static void *spu_syscall_table[] = { #define SYSCALL_SPU(func) sys_##func, #define COMPAT_SYS_SPU(func) sys_##func, +#define COMPAT_SPU_NEW(func) sys_##func, #define SYSX_SPU(f, f3264, f32) f, #include -- cgit v1.2.3 From 28b9c34aa60494c02aa80b5e2cf7210379c3716f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 May 2018 23:20:50 +1000 Subject: powerpc/syscalls: kill ppc32_select() it had always been pointless - compat_sys_select() sign-extends the first argument just fine on its own. Signed-off-by: Al Viro [mpe: Use COMPAT_SPU_NEW() to keep systbl_chk.sh happy] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/kernel/sys_ppc32.c | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index b91701c0711a..cdf528089a63 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -147,7 +147,7 @@ SYSCALL_SPU(setfsuid) SYSCALL_SPU(setfsgid) SYSCALL_SPU(llseek) COMPAT_SYS_SPU(getdents) -SYSX_SPU(sys_select,ppc32_select,sys_select) +COMPAT_SPU_NEW(select) SYSCALL_SPU(flock) SYSCALL_SPU(msync) COMPAT_SYS_SPU(readv) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index c11c73373691..bdf58ba1a94b 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -52,15 +52,6 @@ #include #include - -asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - compat_uptr_t tvp_x) -{ - /* sign extend n */ - return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); -} - unsigned long compat_sys_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) -- cgit v1.2.3 From 53da14d0833a663cb98ef111ed91c575f2da9236 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 30 Apr 2018 13:27:36 +1000 Subject: powerpc: Make it clearer that systbl check errors are errors If the systbl_chk.sh checks fail we print a message, but with no indication that it's an error. That makes it hard to find in build logs with eg. grep. So prefix any output with "Error:". Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/systbl_chk.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh index 31b6e7c358ca..f2e356c2a345 100644 --- a/arch/powerpc/kernel/systbl_chk.sh +++ b/arch/powerpc/kernel/systbl_chk.sh @@ -16,7 +16,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file /^START_TABLE/ { num = 0; next; } /^END_TABLE/ { if (num != $2) { - printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n", + printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n", $2, num - 1; exit(1); } @@ -25,7 +25,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file { if (num == -1) next; if (($1 != -1) && ($1 != num)) { - printf "Syscall %s out of order (expected %s)\n", + printf "Error: Syscall %s out of order (expected %s)\n", $1, num; exit(1); }; -- cgit v1.2.3 From 89c190627257a38d5e4d7cb3e5382f0e6e089f7c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 May 2018 23:42:27 +1000 Subject: powerpc/prom: Drop support for old FDT versions In commit e6a6928c3ea1 ("of/fdt: Convert FDT functions to use libfdt") (Apr 2014), the generic flat device tree code dropped support for flat device tree's older than version 0x10 (16). We still have code in our CPU scanning to cope with flat device tree versions earlier than 2, which can now never trigger, so drop it. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9dbed488aba1..05e7fb47a7a4 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -332,25 +332,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * NOTE: This must match the parsing done in smp_setup_cpu_maps. */ for (i = 0; i < nthreads; i++) { - /* - * version 2 of the kexec param format adds the phys cpuid of - * booted proc. - */ - if (fdt_version(initial_boot_params) >= 2) { - if (be32_to_cpu(intserv[i]) == - fdt_boot_cpuid_phys(initial_boot_params)) { - found = boot_cpu_count; - found_thread = i; - } - } else { - /* - * Check if it's the boot-cpu, set it's hw index now, - * unfortunately this format did not support booting - * off secondary threads. - */ - if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) - found = boot_cpu_count; + if (be32_to_cpu(intserv[i]) == + fdt_boot_cpuid_phys(initial_boot_params)) { + found = boot_cpu_count; + found_thread = i; } #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ -- cgit v1.2.3 From 9f9eae5ce717f497812dfc1bda5219bc589b455d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 28 Mar 2018 21:58:11 +0200 Subject: powerpc/kvm: Prefer fault_in_pages_readable function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly use fault_in_pages_readable instead of manual __get_user code. Fix warning treated as error with W=1: arch/powerpc/kernel/kvm.c:675:6: error: variable ‘tmp’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..683b5b3805bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -672,14 +673,13 @@ static void kvm_use_magic_page(void) { u32 *p; u32 *start, *end; - u32 tmp; u32 features; /* Tell the host to map the magic page to -4096 on all CPUs */ on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } -- cgit v1.2.3 From 24c78586cc6798028205e12c34febf0337bcbded Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Apr 2018 19:08:16 +0200 Subject: powerpc: Avoid an unnecessary test and branch in longjmp() Doing the test at exit of the function avoids an unnecessary test and branch inside longjmp(). Semantics are unchanged. Signed-off-by: Christophe Leroy Reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 384357cb8bc0..c60b70da1e4b 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -85,10 +85,7 @@ _GLOBAL(setjmp) blr _GLOBAL(longjmp) - PPC_LCMPI r4,0 - bne 1f - li r4,1 -1: PPC_LL r13,4*SZL(r3) + PPC_LL r13,4*SZL(r3) PPC_LL r14,5*SZL(r3) PPC_LL r15,6*SZL(r3) PPC_LL r16,7*SZL(r3) @@ -113,7 +110,9 @@ _GLOBAL(longjmp) PPC_LL r1,SZL(r3) PPC_LL r2,2*SZL(r3) mtlr r0 - mr r3,r4 + mr. r3, r4 + bnelr + li r3, 1 blr _GLOBAL(current_stack_pointer) -- cgit v1.2.3 From a1f3ae3fe8a1883c339f1bc89d1c941b3809e084 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Apr 2018 19:08:18 +0200 Subject: powerpc/32: Use stmw/lmw for registers save/restore in asm arch/powerpc/Makefile activates -mmultiple on BE PPC32 configs in order to use multiple word instructions in functions entry/exit. The patch does the same for the asm parts, for consistency. On processors like the 8xx on which insn fetching is pretty slow, this speeds up registers save/restore. Signed-off-by: Christophe Leroy [mpe: PPC32 is BE only, so drop the endian checks] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 6 ++---- arch/powerpc/kernel/misc.S | 10 ++++++++++ arch/powerpc/kernel/ppc_save_regs.S | 4 ++++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 13f7f4c0e1ea..75ece56dcd62 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -80,10 +80,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #else #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \ - SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \ - REST_10GPRS(22, base) +#define SAVE_NVGPRS(base) stmw 13, GPR0+4*13(base) +#define REST_NVGPRS(base) lmw 13, GPR0+4*13(base) #endif #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index c60b70da1e4b..2f18fc1368d0 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -60,6 +60,10 @@ _GLOBAL(setjmp) PPC_STL r0,0(r3) PPC_STL r1,SZL(r3) PPC_STL r2,2*SZL(r3) +#ifdef CONFIG_PPC32 + mfcr r12 + stmw r12, 3*SZL(r3) +#else mfcr r0 PPC_STL r0,3*SZL(r3) PPC_STL r13,4*SZL(r3) @@ -81,10 +85,15 @@ _GLOBAL(setjmp) PPC_STL r29,20*SZL(r3) PPC_STL r30,21*SZL(r3) PPC_STL r31,22*SZL(r3) +#endif li r3,0 blr _GLOBAL(longjmp) +#ifdef CONFIG_PPC32 + lmw r12, 3*SZL(r3) + mtcrf 0x38, r12 +#else PPC_LL r13,4*SZL(r3) PPC_LL r14,5*SZL(r3) PPC_LL r15,6*SZL(r3) @@ -106,6 +115,7 @@ _GLOBAL(longjmp) PPC_LL r31,22*SZL(r3) PPC_LL r0,3*SZL(r3) mtcrf 0x38,r0 +#endif PPC_LL r0,0(r3) PPC_LL r1,SZL(r3) PPC_LL r2,2*SZL(r3) diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 1b1787d52896..8afbe213d729 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -25,6 +25,9 @@ */ _GLOBAL(ppc_save_regs) PPC_STL r0,0*SZL(r3) +#ifdef CONFIG_PPC32 + stmw r2, 2*SZL(r3) +#else PPC_STL r2,2*SZL(r3) PPC_STL r3,3*SZL(r3) PPC_STL r4,4*SZL(r3) @@ -55,6 +58,7 @@ _GLOBAL(ppc_save_regs) PPC_STL r29,29*SZL(r3) PPC_STL r30,30*SZL(r3) PPC_STL r31,31*SZL(r3) +#endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) PPC_STL r4,1*SZL(r3) -- cgit v1.2.3 From ba01b058a52abcb0539d94ae976ef1c357e06730 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 May 2018 10:31:17 +0100 Subject: powerpc/rtas: Fix spelling mistake "Discharching" -> "Discharging" Trivial fix to spelling mistake in battery_charging array. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas-proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb070d8cad07..11ef978e95db 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -528,7 +528,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m, "EPOW power off" }; const char * battery_cyclestate[] = { "None", "In progress", "Requested" }; - const char * battery_charging[] = { "Charging", "Discharching", + const char * battery_charging[] = { "Charging", "Discharging", "No current flow" }; const char * ibm_drconnector[] = { "Empty", "Present", "Unusable", "Exchange" }; -- cgit v1.2.3 From cd6ef7eebf171bfcba7dc2df719c2a4958775040 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 17 May 2018 15:37:14 +1000 Subject: powerpc/ptrace: Fix enforcement of DAWR constraints Back when we first introduced the DAWR, in commit 4ae7ebe9522a ("powerpc: Change hardware breakpoint to allow longer ranges"), we screwed up the constraint making it a 1024 byte boundary rather than a 512. This makes the check overly permissive. Fortunately GDB is the only real user and it always did they right thing, so we never noticed. This fixes the constraint to 512 bytes. Fixes: 4ae7ebe9522a ("powerpc: Change hardware breakpoint to allow longer ranges") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/hw_breakpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 4c1012b80d3b..80547dad37da 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -178,8 +178,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (cpu_has_feature(CPU_FTR_DAWR)) { length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ - if ((bp->attr.bp_addr >> 10) != - ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10)) + if ((bp->attr.bp_addr >> 9) != + ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 9)) return -EINVAL; } if (info->len > -- cgit v1.2.3 From 4f7c06e26ec9cf7fe9f0c54dc90079b6a4f4b2c3 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 17 May 2018 15:37:15 +1000 Subject: powerpc/ptrace: Fix setting 512B aligned breakpoints with PTRACE_SET_DEBUGREG In commit e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when validating DAWR region end") we fixed setting the DAWR end point to its max value via PPC_PTRACE_SETHWDEBUG. Unfortunately we broke PTRACE_SET_DEBUGREG when setting a 512 byte aligned breakpoint. PTRACE_SET_DEBUGREG currently sets the length of the breakpoint to zero (memset() in hw_breakpoint_init()). This worked with arch_validate_hwbkpt_settings() before the above patch was applied but is now broken if the breakpoint is 512byte aligned. This sets the length of the breakpoint to 8 bytes when using PTRACE_SET_DEBUGREG. Fixes: e2a800beaca1 ("powerpc/hw_brk: Fix off by one error when validating DAWR region end") Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d23cf632edf0..0f63dd5972e9 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2443,6 +2443,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Create a new breakpoint request if one doesn't exist already */ hw_breakpoint_init(&attr); attr.bp_addr = hw_brk.address; + attr.bp_len = 8; arch_bp_generic_fields(hw_brk.type, &attr.bp_type); -- cgit v1.2.3 From d1c7211281c5e1799f00b2228157530e0f7a671c Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:44 +0800 Subject: powerpc: Export msr_check_and_set() to modules PR KVM will need to reuse msr_check_and_set(). This patch exports this API for reuse. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1237f13fed51..25db000fa5b3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -154,6 +154,7 @@ unsigned long msr_check_and_set(unsigned long bits) return newmsr; } +EXPORT_SYMBOL_GPL(msr_check_and_set); void __msr_check_and_clear(unsigned long bits) { -- cgit v1.2.3 From eacbb218fbbab5923775059f7232a9622dc47b2a Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Wed, 23 May 2018 15:01:46 +0800 Subject: powerpc: Export tm_enable()/tm_disable/tm_abort() APIs This patch exports tm_enable()/tm_disable/tm_abort() APIs, which will be used for PR KVM transactional memory logic. Signed-off-by: Simon Guo Reviewed-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 3 +++ arch/powerpc/include/asm/tm.h | 2 -- arch/powerpc/kernel/tm.S | 12 ++++++++++++ arch/powerpc/mm/hash_utils_64.c | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d9713ad62e3c..dfdcb2374c28 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,4 +141,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); void pnv_power9_force_smt4_catch(void); void pnv_power9_force_smt4_release(void); +void tm_enable(void); +void tm_disable(void); +void tm_abort(uint8_t cause); #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index b1658c97047c..e94f6db5e367 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -10,12 +10,10 @@ #ifndef __ASSEMBLY__ -extern void tm_enable(void); extern void tm_reclaim(struct thread_struct *thread, uint8_t cause); extern void tm_reclaim_current(uint8_t cause); extern void tm_recheckpoint(struct thread_struct *thread); -extern void tm_abort(uint8_t cause); extern void tm_save_sprs(struct thread_struct *thread); extern void tm_restore_sprs(struct thread_struct *thread); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b92ac8e711db..ff12f47a96b6 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_VSX /* See fpu.S, this is borrowed from there */ @@ -55,6 +56,16 @@ _GLOBAL(tm_enable) or r4, r4, r3 mtmsrd r4 1: blr +EXPORT_SYMBOL_GPL(tm_enable); + +_GLOBAL(tm_disable) + mfmsr r4 + li r3, MSR_TM >> 32 + sldi r3, r3, 32 + andc r4, r4, r3 + mtmsrd r4 + blr +EXPORT_SYMBOL_GPL(tm_disable); _GLOBAL(tm_save_sprs) mfspr r0, SPRN_TFHAR @@ -78,6 +89,7 @@ _GLOBAL(tm_restore_sprs) _GLOBAL(tm_abort) TABORT(R3) blr +EXPORT_SYMBOL_GPL(tm_abort); /* void tm_reclaim(struct thread_struct *thread, * uint8_t cause) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0bd3790d35df..1bd8b4c1aab8 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -64,6 +64,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) -- cgit v1.2.3 From e4ccb1dae6bdef228d729c076c38161ef6e7ca34 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 24 May 2018 11:02:06 +0000 Subject: powerpc/8xx: fix invalid register expression in head_8xx.S New binutils generate the following warning AS arch/powerpc/kernel/head_8xx.o arch/powerpc/kernel/head_8xx.S: Assembler messages: arch/powerpc/kernel/head_8xx.S:916: Warning: invalid register expression This patch fixes it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/head_8xx.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d8670a37d70c..6cab07e76732 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -913,7 +913,7 @@ start_here: tovirt(r6,r6) lis r5, abatron_pteptrs@h ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(r0) /* Must match your Abatron config file */ + stw r5, 0xf0(0) /* Must match your Abatron config file */ tophys(r5,r5) stw r6, 0(r5) -- cgit v1.2.3 From eae5f709a4d738c52b6ab636981755d76349ea9e Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 6 Apr 2018 22:12:19 +0200 Subject: powerpc: Add __printf verification to prom_printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __printf is useful to verify format and arguments. Fix arg mismatch reported by gcc, remove the following warnings (with W=1): arch/powerpc/kernel/prom_init.c:1467:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1471:31: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1504:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1505:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1506:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1507:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1508:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1509:33: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:1975:39: error: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 2 has type ‘unsigned int’ arch/powerpc/kernel/prom_init.c:1986:27: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2567:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2567:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2569:38: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 2 has type ‘long unsigned int’ arch/powerpc/kernel/prom_init.c:2569:46: error: format ‘%x’ expects argument of type ‘unsigned int’, but argument 3 has type ‘long unsigned int’ The patch also include arg mismatch fix for case with #define DEBUG_PROM (warning not listed here). This patch fix also the following warnings revealed by checkpatch: WARNING: Prefer using '"%s...", __func__' to using 'alloc_up', this function's name, in a string #101: FILE: arch/powerpc/kernel/prom_init.c:1235: + prom_debug("alloc_up(%lx, %lx)\n", size, align); and WARNING: Prefer using '"%s...", __func__' to using 'alloc_down', this function's name, in a string #138: FILE: arch/powerpc/kernel/prom_init.c:1278: + prom_debug("alloc_down(%lx, %lx, %s)\n", size, align, Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 114 ++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f9d6befb55a6..67f9c157bcc0 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -334,6 +334,7 @@ static void __init prom_print_dec(unsigned long val) call_prom("write", 3, 1, prom.stdout, buf+i, size); } +__printf(1, 2) static void __init prom_printf(const char *format, ...) { const char *p, *q, *s; @@ -1160,7 +1161,7 @@ static void __init prom_send_capabilities(void) */ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n", cores, NR_CPUS); ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); @@ -1242,7 +1243,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) if (align) base = _ALIGN_UP(base, align); - prom_debug("alloc_up(%x, %x)\n", size, align); + prom_debug("%s(%lx, %lx)\n", __func__, size, align); if (ram_top == 0) prom_panic("alloc_up() called with mem not initialized\n"); @@ -1253,7 +1254,7 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) for(; (base + size) <= alloc_top; base = _ALIGN_UP(base + 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1265,12 +1266,12 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align) return 0; alloc_bottom = addr + size; - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1285,7 +1286,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, { unsigned long base, addr = 0; - prom_debug("alloc_down(%x, %x, %s)\n", size, align, + prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align, highmem ? "(high)" : "(low)"); if (ram_top == 0) prom_panic("alloc_down() called with mem not initialized\n"); @@ -1313,7 +1314,7 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, base = _ALIGN_DOWN(alloc_top - size, align); for (; base > alloc_bottom; base = _ALIGN_DOWN(base - 0x100000, align)) { - prom_debug(" trying: 0x%x\n\r", base); + prom_debug(" trying: 0x%lx\n\r", base); addr = (unsigned long)prom_claim(base, size, 0); if (addr != PROM_ERROR && addr != 0) break; @@ -1324,12 +1325,12 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align, alloc_top = addr; bail: - prom_debug(" -> %x\n", addr); - prom_debug(" alloc_bottom : %x\n", alloc_bottom); - prom_debug(" alloc_top : %x\n", alloc_top); - prom_debug(" alloc_top_hi : %x\n", alloc_top_high); - prom_debug(" rmo_top : %x\n", rmo_top); - prom_debug(" ram_top : %x\n", ram_top); + prom_debug(" -> %lx\n", addr); + prom_debug(" alloc_bottom : %lx\n", alloc_bottom); + prom_debug(" alloc_top : %lx\n", alloc_top); + prom_debug(" alloc_top_hi : %lx\n", alloc_top_high); + prom_debug(" rmo_top : %lx\n", rmo_top); + prom_debug(" ram_top : %lx\n", ram_top); return addr; } @@ -1455,7 +1456,7 @@ static void __init prom_init_mem(void) if (size == 0) continue; - prom_debug(" %x %x\n", base, size); + prom_debug(" %lx %lx\n", base, size); if (base == 0 && (of_platform & PLATFORM_LPAR)) rmo_top = size; if ((base + size) > ram_top) @@ -1475,12 +1476,12 @@ static void __init prom_init_mem(void) if (prom_memory_limit) { if (prom_memory_limit <= alloc_bottom) { - prom_printf("Ignoring mem=%x <= alloc_bottom.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx <= alloc_bottom.\n", + prom_memory_limit); prom_memory_limit = 0; } else if (prom_memory_limit >= ram_top) { - prom_printf("Ignoring mem=%x >= ram_top.\n", - prom_memory_limit); + prom_printf("Ignoring mem=%lx >= ram_top.\n", + prom_memory_limit); prom_memory_limit = 0; } else { ram_top = prom_memory_limit; @@ -1512,12 +1513,13 @@ static void __init prom_init_mem(void) alloc_bottom = PAGE_ALIGN(prom_initrd_end); prom_printf("memory layout at init:\n"); - prom_printf(" memory_limit : %x (16 MB aligned)\n", prom_memory_limit); - prom_printf(" alloc_bottom : %x\n", alloc_bottom); - prom_printf(" alloc_top : %x\n", alloc_top); - prom_printf(" alloc_top_hi : %x\n", alloc_top_high); - prom_printf(" rmo_top : %x\n", rmo_top); - prom_printf(" ram_top : %x\n", ram_top); + prom_printf(" memory_limit : %lx (16 MB aligned)\n", + prom_memory_limit); + prom_printf(" alloc_bottom : %lx\n", alloc_bottom); + prom_printf(" alloc_top : %lx\n", alloc_top); + prom_printf(" alloc_top_hi : %lx\n", alloc_top_high); + prom_printf(" rmo_top : %lx\n", rmo_top); + prom_printf(" ram_top : %lx\n", ram_top); } static void __init prom_close_stdin(void) @@ -1578,7 +1580,7 @@ static void __init prom_instantiate_opal(void) return; } - prom_printf("instantiating opal at 0x%x...", base); + prom_printf("instantiating opal at 0x%llx...", base); if (call_prom_ret("call-method", 4, 3, rets, ADDR("load-opal-runtime"), @@ -1594,10 +1596,10 @@ static void __init prom_instantiate_opal(void) reserve_mem(base, size); - prom_debug("opal base = 0x%x\n", base); - prom_debug("opal align = 0x%x\n", align); - prom_debug("opal entry = 0x%x\n", entry); - prom_debug("opal size = 0x%x\n", (long)size); + prom_debug("opal base = 0x%llx\n", base); + prom_debug("opal align = 0x%llx\n", align); + prom_debug("opal entry = 0x%llx\n", entry); + prom_debug("opal size = 0x%llx\n", size); prom_setprop(opal_node, "/ibm,opal", "opal-base-address", &base, sizeof(base)); @@ -1674,7 +1676,7 @@ static void __init prom_instantiate_rtas(void) prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); - prom_debug("rtas size = 0x%x\n", (long)size); + prom_debug("rtas size = 0x%x\n", size); prom_debug("prom_instantiate_rtas: end...\n"); } @@ -1732,7 +1734,7 @@ static void __init prom_instantiate_sml(void) if (base == 0) prom_panic("Could not allocate memory for sml\n"); - prom_printf("instantiating sml at 0x%x...", base); + prom_printf("instantiating sml at 0x%llx...", base); memset((void *)base, 0, size); @@ -1751,8 +1753,8 @@ static void __init prom_instantiate_sml(void) prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size", &size, sizeof(size)); - prom_debug("sml base = 0x%x\n", base); - prom_debug("sml size = 0x%x\n", (long)size); + prom_debug("sml base = 0x%llx\n", base); + prom_debug("sml size = 0x%x\n", size); prom_debug("prom_instantiate_sml: end...\n"); } @@ -1845,7 +1847,7 @@ static void __init prom_initialize_tce_table(void) prom_debug("TCE table: %s\n", path); prom_debug("\tnode = 0x%x\n", node); - prom_debug("\tbase = 0x%x\n", base); + prom_debug("\tbase = 0x%llx\n", base); prom_debug("\tsize = 0x%x\n", minsize); /* Initialize the table to have a one-to-one mapping @@ -1932,12 +1934,12 @@ static void __init prom_hold_cpus(void) } prom_debug("prom_hold_cpus: start...\n"); - prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); - prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); - prom_debug(" 1) acknowledge = 0x%x\n", + prom_debug(" 1) spinloop = 0x%lx\n", (unsigned long)spinloop); + prom_debug(" 1) *spinloop = 0x%lx\n", *spinloop); + prom_debug(" 1) acknowledge = 0x%lx\n", (unsigned long)acknowledge); - prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge); - prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold); + prom_debug(" 1) *acknowledge = 0x%lx\n", *acknowledge); + prom_debug(" 1) secondary_hold = 0x%lx\n", secondary_hold); /* Set the common spinloop variable, so all of the secondary cpus * will block when they are awakened from their OF spinloop. @@ -1965,7 +1967,7 @@ static void __init prom_hold_cpus(void) prom_getprop(node, "reg", ®, sizeof(reg)); cpu_no = be32_to_cpu(reg); - prom_debug("cpu hw idx = %lu\n", cpu_no); + prom_debug("cpu hw idx = %u\n", cpu_no); /* Init the acknowledge var which will be reset by * the secondary cpu when it awakens from its OF @@ -1975,7 +1977,7 @@ static void __init prom_hold_cpus(void) if (cpu_no != prom.cpu) { /* Primary Thread of non-boot cpu or any thread */ - prom_printf("starting cpu hw idx %lu... ", cpu_no); + prom_printf("starting cpu hw idx %u... ", cpu_no); call_prom("start-cpu", 3, 0, node, secondary_hold, cpu_no); @@ -1986,11 +1988,11 @@ static void __init prom_hold_cpus(void) if (*acknowledge == cpu_no) prom_printf("done\n"); else - prom_printf("failed: %x\n", *acknowledge); + prom_printf("failed: %lx\n", *acknowledge); } #ifdef CONFIG_SMP else - prom_printf("boot cpu hw idx %lu\n", cpu_no); + prom_printf("boot cpu hw idx %u\n", cpu_no); #endif /* CONFIG_SMP */ } @@ -2268,7 +2270,7 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end, while ((*mem_start + needed) > *mem_end) { unsigned long room, chunk; - prom_debug("Chunk exhausted, claiming more at %x...\n", + prom_debug("Chunk exhausted, claiming more at %lx...\n", alloc_bottom); room = alloc_top - alloc_bottom; if (room > DEVTREE_CHUNK_SIZE) @@ -2494,7 +2496,7 @@ static void __init flatten_device_tree(void) room = alloc_top - alloc_bottom - 0x4000; if (room > DEVTREE_CHUNK_SIZE) room = DEVTREE_CHUNK_SIZE; - prom_debug("starting device tree allocs at %x\n", alloc_bottom); + prom_debug("starting device tree allocs at %lx\n", alloc_bottom); /* Now try to claim that */ mem_start = (unsigned long)alloc_up(room, PAGE_SIZE); @@ -2557,7 +2559,7 @@ static void __init flatten_device_tree(void) int i; prom_printf("reserved memory map:\n"); for (i = 0; i < mem_reserve_cnt; i++) - prom_printf(" %x - %x\n", + prom_printf(" %llx - %llx\n", be64_to_cpu(mem_reserve_map[i].base), be64_to_cpu(mem_reserve_map[i].size)); } @@ -2567,9 +2569,9 @@ static void __init flatten_device_tree(void) */ mem_reserve_cnt = MEM_RESERVE_MAP_SIZE; - prom_printf("Device tree strings 0x%x -> 0x%x\n", + prom_printf("Device tree strings 0x%lx -> 0x%lx\n", dt_string_start, dt_string_end); - prom_printf("Device tree struct 0x%x -> 0x%x\n", + prom_printf("Device tree struct 0x%lx -> 0x%lx\n", dt_struct_start, dt_struct_end); } @@ -3001,7 +3003,7 @@ static void __init prom_find_boot_cpu(void) prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); - prom_debug("Booting CPU hw index = %lu\n", prom.cpu); + prom_debug("Booting CPU hw index = %d\n", prom.cpu); } static void __init prom_check_initrd(unsigned long r3, unsigned long r4) @@ -3023,8 +3025,8 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) reserve_mem(prom_initrd_start, prom_initrd_end - prom_initrd_start); - prom_debug("initrd_start=0x%x\n", prom_initrd_start); - prom_debug("initrd_end=0x%x\n", prom_initrd_end); + prom_debug("initrd_start=0x%lx\n", prom_initrd_start); + prom_debug("initrd_end=0x%lx\n", prom_initrd_end); } #endif /* CONFIG_BLK_DEV_INITRD */ } @@ -3277,7 +3279,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); - prom_debug("->dt_header_start=0x%x\n", hdr); + prom_debug("->dt_header_start=0x%lx\n", hdr); } #ifdef CONFIG_PPC32 -- cgit v1.2.3 From d8731527acee53b4d46d59ff0b5fc36931ad0451 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Fri, 13 Apr 2018 20:41:43 +0200 Subject: powerpc/sparse: Fix plain integer as NULL pointer warning Trivial fix to remove the following sparse warnings: arch/powerpc/kernel/module_32.c:112:74: warning: Using plain integer as NULL pointer arch/powerpc/kernel/module_32.c:117:74: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:1155:28: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:1230:20: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:1385:36: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:1752:23: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:2084:19: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:2110:32: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:2167:19: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:2183:19: warning: Using plain integer as NULL pointer drivers/macintosh/via-pmu.c:277:20: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/setup.c:155:67: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/setup.c:247:27: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/setup.c:249:27: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/setup.c:252:37: warning: Using plain integer as NULL pointer arch/powerpc/mm/tlb_hash32.c:127:21: warning: Using plain integer as NULL pointer arch/powerpc/mm/tlb_hash32.c:148:21: warning: Using plain integer as NULL pointer arch/powerpc/mm/tlb_hash32.c:44:21: warning: Using plain integer as NULL pointer arch/powerpc/mm/tlb_hash32.c:57:21: warning: Using plain integer as NULL pointer arch/powerpc/mm/tlb_hash32.c:87:21: warning: Using plain integer as NULL pointer arch/powerpc/kernel/btext.c:160:31: warning: Using plain integer as NULL pointer arch/powerpc/kernel/btext.c:167:22: warning: Using plain integer as NULL pointer arch/powerpc/kernel/btext.c:274:21: warning: Using plain integer as NULL pointer arch/powerpc/kernel/btext.c:285:31: warning: Using plain integer as NULL pointer arch/powerpc/include/asm/hugetlb.h:204:16: warning: Using plain integer as NULL pointer arch/powerpc/mm/ppc_mmu_32.c:170:21: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/pci.c:1227:23: warning: Using plain integer as NULL pointer arch/powerpc/platforms/powermac/pci.c:65:24: warning: Using plain integer as NULL pointer Also use `--fix` command line option from `script/checkpatch --strict` to remove the following: CHECK: Comparison to NULL could be written "!dispDeviceBase" #72: FILE: arch/powerpc/kernel/btext.c:160: + if (dispDeviceBase == NULL) CHECK: Comparison to NULL could be written "!vbase" #80: FILE: arch/powerpc/kernel/btext.c:167: + if (vbase == NULL) CHECK: Comparison to NULL could be written "!base" #89: FILE: arch/powerpc/kernel/btext.c:274: + if (base == NULL) CHECK: Comparison to NULL could be written "!dispDeviceBase" #98: FILE: arch/powerpc/kernel/btext.c:285: + if (dispDeviceBase == NULL) CHECK: Comparison to NULL could be written "strstr" #117: FILE: arch/powerpc/kernel/module_32.c:117: + if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL) CHECK: Comparison to NULL could be written "!Hash" #130: FILE: arch/powerpc/mm/ppc_mmu_32.c:170: + if (Hash == NULL) CHECK: Comparison to NULL could be written "Hash" #143: FILE: arch/powerpc/mm/tlb_hash32.c:44: + if (Hash != NULL) { CHECK: Comparison to NULL could be written "!Hash" #152: FILE: arch/powerpc/mm/tlb_hash32.c:57: + if (Hash == NULL) { CHECK: Comparison to NULL could be written "!Hash" #161: FILE: arch/powerpc/mm/tlb_hash32.c:87: + if (Hash == NULL) { CHECK: Comparison to NULL could be written "!Hash" #170: FILE: arch/powerpc/mm/tlb_hash32.c:127: + if (Hash == NULL) { CHECK: Comparison to NULL could be written "!Hash" #179: FILE: arch/powerpc/mm/tlb_hash32.c:148: + if (Hash == NULL) { ERROR: space required after that ';' (ctx:VxV) #192: FILE: arch/powerpc/platforms/powermac/pci.c:65: + for (; node != NULL;node = node->sibling) { CHECK: Comparison to NULL could be written "node" #192: FILE: arch/powerpc/platforms/powermac/pci.c:65: + for (; node != NULL;node = node->sibling) { CHECK: Comparison to NULL could be written "!region" #201: FILE: arch/powerpc/platforms/powermac/pci.c:1227: + if (region == NULL) CHECK: Comparison to NULL could be written "of_get_property" #214: FILE: arch/powerpc/platforms/powermac/setup.c:155: + if (of_get_property(np, "cache-unified", NULL) != NULL && dc) { CHECK: Comparison to NULL could be written "!np" #223: FILE: arch/powerpc/platforms/powermac/setup.c:247: + if (np == NULL) CHECK: Comparison to NULL could be written "np" #226: FILE: arch/powerpc/platforms/powermac/setup.c:249: + if (np != NULL) { CHECK: Comparison to NULL could be written "l2cr" #230: FILE: arch/powerpc/platforms/powermac/setup.c:252: + if (l2cr != NULL) { CHECK: Comparison to NULL could be written "via" #243: FILE: drivers/macintosh/via-pmu.c:277: + if (via != NULL) CHECK: Comparison to NULL could be written "current_req" #252: FILE: drivers/macintosh/via-pmu.c:1155: + if (current_req != NULL) { CHECK: Comparison to NULL could be written "!req" #261: FILE: drivers/macintosh/via-pmu.c:1230: + if (req == NULL || pmu_state != idle CHECK: Comparison to NULL could be written "!req" #270: FILE: drivers/macintosh/via-pmu.c:1385: + if (req == NULL) { CHECK: Comparison to NULL could be written "!pp" #288: FILE: drivers/macintosh/via-pmu.c:2084: + if (pp == NULL) CHECK: Comparison to NULL could be written "!pp" #297: FILE: drivers/macintosh/via-pmu.c:2110: + if (count < 1 || pp == NULL) CHECK: Comparison to NULL could be written "!pp" #306: FILE: drivers/macintosh/via-pmu.c:2167: + if (pp == NULL) CHECK: Comparison to NULL could be written "pp" #315: FILE: drivers/macintosh/via-pmu.c:2183: + if (pp != NULL) { Link: https://github.com/linuxppc/linux/issues/37 Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hugetlb.h | 2 +- arch/powerpc/kernel/btext.c | 8 ++++---- arch/powerpc/kernel/module_32.c | 4 ++-- arch/powerpc/mm/ppc_mmu_32.c | 2 +- arch/powerpc/mm/tlb_hash32.c | 10 +++++----- arch/powerpc/platforms/powermac/pci.c | 4 ++-- arch/powerpc/platforms/powermac/setup.c | 8 ++++---- drivers/macintosh/via-pmu.c | 18 +++++++++--------- 8 files changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 78540c074d70..96444bc08034 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -202,7 +202,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, unsigned pdshift) { - return 0; + return NULL; } #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6537cba1a758..54403144623f 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -157,14 +157,14 @@ void btext_map(void) /* By default, we are no longer mapped */ boot_text_mapped = 0; - if (dispDeviceBase == 0) + if (!dispDeviceBase) return; base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL; offset = ((unsigned long) dispDeviceBase) - base; size = dispDeviceRowBytes * dispDeviceRect[3] + offset + dispDeviceRect[0]; vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0)))); - if (vbase == 0) + if (!vbase) return; logicalDisplayBase = vbase + offset; boot_text_mapped = 1; @@ -270,7 +270,7 @@ static unsigned char * calc_base(int x, int y) unsigned char *base; base = logicalDisplayBase; - if (base == 0) + if (!base) base = dispDeviceBase; base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3); base += (y + dispDeviceRect[1]) * dispDeviceRowBytes; @@ -281,7 +281,7 @@ static unsigned char * calc_base(int x, int y) void btext_update_display(unsigned long phys, int width, int height, int depth, int pitch) { - if (dispDeviceBase == 0) + if (!dispDeviceBase) return; /* check it's the same frame buffer (within 256MB) */ diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 5a7a78f12562..88d83771f462 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -109,12 +109,12 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr, for (i = 1; i < hdr->e_shnum; i++) { /* If it's called *.init*, and we're not init, we're not interested */ - if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL) != is_init) continue; /* We don't want to look at debug sections. */ - if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0) + if (strstr(secstrings + sechdrs[i].sh_name, ".debug")) continue; if (sechdrs[i].sh_type == SHT_RELA) { diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 2a049fb8523d..bea6c544e38f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -167,7 +167,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, { pmd_t *pmd; - if (Hash == 0) + if (!Hash) return; pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); if (!pmd_none(*pmd)) diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c index 702d7689d714..cf8472cf3d59 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/tlb_hash32.c @@ -41,7 +41,7 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) { unsigned long ptephys; - if (Hash != 0) { + if (Hash) { ptephys = __pa(ptep) & PAGE_MASK; flush_hash_pages(mm->context.id, addr, ptephys, 1); } @@ -54,7 +54,7 @@ EXPORT_SYMBOL(flush_hash_entry); */ void tlb_flush(struct mmu_gather *tlb) { - if (Hash == 0) { + if (!Hash) { /* * 603 needs to flush the whole TLB here since * it doesn't use a hash table. @@ -84,7 +84,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, int count; unsigned int ctx = mm->context.id; - if (Hash == 0) { + if (!Hash) { _tlbia(); return; } @@ -124,7 +124,7 @@ void flush_tlb_mm(struct mm_struct *mm) { struct vm_area_struct *mp; - if (Hash == 0) { + if (!Hash) { _tlbia(); return; } @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) struct mm_struct *mm; pmd_t *pmd; - if (Hash == 0) { + if (!Hash) { _tlbie(vmaddr); return; } diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 0b8174a79993..df762bb3c735 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -62,7 +62,7 @@ struct device_node *k2_skiplist[2]; static int __init fixup_one_level_bus_range(struct device_node *node, int higher) { - for (; node != 0;node = node->sibling) { + for (; node; node = node->sibling) { const int * bus_range; const unsigned int *class_code; int len; @@ -1219,7 +1219,7 @@ static void fixup_u4_pcie(struct pci_dev* dev) region = r; } /* Nothing found, bail */ - if (region == 0) + if (!region) return; /* Print things out */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index ab668cb72263..f40e87ca180b 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -152,7 +152,7 @@ static void pmac_show_cpuinfo(struct seq_file *m) of_get_property(np, "d-cache-size", NULL); seq_printf(m, "L2 cache\t:"); has_l2cache = 1; - if (of_get_property(np, "cache-unified", NULL) != 0 && dc) { + if (of_get_property(np, "cache-unified", NULL) && dc) { seq_printf(m, " %dK unified", *dc / 1024); } else { if (ic) @@ -244,12 +244,12 @@ static void __init l2cr_init(void) /* Checks "l2cr-value" property in the registry */ if (cpu_has_feature(CPU_FTR_L2CR)) { struct device_node *np = of_find_node_by_name(NULL, "cpus"); - if (np == 0) + if (!np) np = of_find_node_by_type(NULL, "cpu"); - if (np != 0) { + if (np) { const unsigned int *l2cr = of_get_property(np, "l2cr-value", NULL); - if (l2cr != 0) { + if (l2cr) { ppc_override_l2cr = 1; ppc_override_l2cr_value = *l2cr; _set_L2CR(0); diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 433dbeddfcf9..a3b6a4a703ab 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -274,7 +274,7 @@ int __init find_via_pmu(void) u64 taddr; const u32 *reg; - if (via != 0) + if (via) return 1; vias = of_find_node_by_name(NULL, "via-pmu"); if (vias == NULL) @@ -1152,7 +1152,7 @@ pmu_queue_request(struct adb_request *req) req->complete = 0; spin_lock_irqsave(&pmu_lock, flags); - if (current_req != 0) { + if (current_req) { last_req->next = req; last_req = req; } else { @@ -1227,7 +1227,7 @@ pmu_start(void) /* assert pmu_state == idle */ /* get the packet to send */ req = current_req; - if (req == 0 || pmu_state != idle + if (!req || pmu_state != idle || (/*req->reply_expected && */req_awaiting_reply)) return; @@ -1382,7 +1382,7 @@ next: if ((1 << pirq) & PMU_INT_ADB) { if ((data[0] & PMU_INT_ADB_AUTO) == 0) { struct adb_request *req = req_awaiting_reply; - if (req == 0) { + if (!req) { printk(KERN_ERR "PMU: extra ADB reply\n"); return; } @@ -1749,7 +1749,7 @@ pmu_shutdown(void) int pmu_present(void) { - return via != 0; + return via != NULL; } #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32) @@ -2081,7 +2081,7 @@ pmu_open(struct inode *inode, struct file *file) unsigned long flags; pp = kmalloc(sizeof(struct pmu_private), GFP_KERNEL); - if (pp == 0) + if (!pp) return -ENOMEM; pp->rb_get = pp->rb_put = 0; spin_lock_init(&pp->lock); @@ -2107,7 +2107,7 @@ pmu_read(struct file *file, char __user *buf, unsigned long flags; int ret = 0; - if (count < 1 || pp == 0) + if (count < 1 || !pp) return -EINVAL; if (!access_ok(VERIFY_WRITE, buf, count)) return -EFAULT; @@ -2164,7 +2164,7 @@ pmu_fpoll(struct file *filp, poll_table *wait) __poll_t mask = 0; unsigned long flags; - if (pp == 0) + if (!pp) return 0; poll_wait(filp, &pp->wait, wait); spin_lock_irqsave(&pp->lock, flags); @@ -2180,7 +2180,7 @@ pmu_release(struct inode *inode, struct file *file) struct pmu_private *pp = file->private_data; unsigned long flags; - if (pp != 0) { + if (pp) { file->private_data = NULL; spin_lock_irqsave(&all_pvt_lock, flags); list_del(&pp->list); -- cgit v1.2.3 From 85aa4b98419d74dd5cc914e089349800ac4997ee Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Mon, 23 Apr 2018 21:36:38 +0200 Subject: powerpc/mm/radix: Use do/while(0) trick for single statement block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 7a22d6321c3d ("powerpc/mm/radix: Update command line parsing for disable_radix") an `if` statement was added for a possible empty body (prom_debug). Fix the following warning, treated as error with W=1: arch/powerpc/kernel/prom_init.c:656:46: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body] Suggested-by: Randy Dunlap Signed-off-by: Mathieu Malaterre Acked-by: Randy Dunlap Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 67f9c157bcc0..425992e393bc 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -103,7 +103,7 @@ int of_workarounds; #ifdef DEBUG_PROM #define prom_debug(x...) prom_printf(x) #else -#define prom_debug(x...) +#define prom_debug(x...) do { } while (0) #endif -- cgit v1.2.3 From bd13ac95f954570e01fba5a6caf771da754ac0e3 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 4 Apr 2018 22:10:28 +0200 Subject: powerpc/tau: Synchronize function prototypes and body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some function prototypes and body for Thermal Assist Units were not in sync. Update the function definition to match the existing function declaration found in `setup-common.c`, changing an `int` return type to a `u32` return type. Move the prototypes to a header file. Fix the following warnings, treated as error with W=1: arch/powerpc/kernel/tau_6xx.c:257:5: error: no previous prototype for ‘cpu_temp_both’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:262:5: error: no previous prototype for ‘cpu_temp’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:267:5: error: no previous prototype for ‘tau_interrupts’ [-Werror=missing-prototypes] Compile tested with CONFIG_TAU_INT. Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/setup-common.c | 6 ------ arch/powerpc/kernel/setup.h | 6 ++++++ arch/powerpc/kernel/tau_6xx.c | 7 +++++-- 4 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 061aa0f47bb1..bbf7ec582d60 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,7 +89,7 @@ atomic_t ppc_n_lost_interrupts; #ifdef CONFIG_TAU_INT extern int tau_initialized; -extern int tau_interrupts(int); +u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0af5c11b9e78..62b1a40d8957 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -192,12 +192,6 @@ void machine_halt(void) machine_hang(); } - -#ifdef CONFIG_TAU -extern u32 cpu_temp(unsigned long cpu); -extern u32 cpu_temp_both(unsigned long cpu); -#endif /* CONFIG_TAU */ - #ifdef CONFIG_SMP DEFINE_PER_CPU(unsigned int, cpu_pvr); #endif diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index d144df54ad40..c6a592b67386 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -62,4 +62,10 @@ void kvm_cma_reserve(void); static inline void kvm_cma_reserve(void) { }; #endif +#ifdef CONFIG_TAU +u32 cpu_temp(unsigned long cpu); +u32 cpu_temp_both(unsigned long cpu); +u32 tau_interrupts(unsigned long cpu); +#endif /* CONFIG_TAU */ + #endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8cdd852aedd1..8f02353c049e 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -27,6 +27,9 @@ #include #include #include +#include + +#include "setup.h" static struct tau_temp { @@ -259,12 +262,12 @@ u32 cpu_temp_both(unsigned long cpu) return ((tau[cpu].high << 16) | tau[cpu].low); } -int cpu_temp(unsigned long cpu) +u32 cpu_temp(unsigned long cpu) { return ((tau[cpu].high + tau[cpu].low) / 2); } -int tau_interrupts(unsigned long cpu) +u32 tau_interrupts(unsigned long cpu) { return (tau[cpu].interrupts); } -- cgit v1.2.3 From 86e11b6e9c56e605475462bf9ba7c12dbe1e3e29 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:19:52 +0100 Subject: powerpc: Make function btext_initialize static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function can be static, make it so, this fix a warning treated as error with W=1: arch/powerpc/kernel/btext.c:173:5: error: no previous prototype for ‘btext_initialize’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/btext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 54403144623f..b2072d5bbf2b 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -170,7 +170,7 @@ void btext_map(void) boot_text_mapped = 1; } -int btext_initialize(struct device_node *np) +static int btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; -- cgit v1.2.3 From 9e0d86cd2d749998c3792059221cefa210b0177b Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:19:54 +0100 Subject: powerpc/tau: Make some function static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions can all be static, make it so. Fix warnings treated as errors with W=1: arch/powerpc/kernel/tau_6xx.c:53:6: error: no previous prototype for ‘set_thresholds’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:73:6: error: no previous prototype for ‘TAUupdate’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:208:13: error: no previous prototype for ‘TAU_init_smp’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:220:12: error: no previous prototype for ‘TAU_init’ [-Werror=missing-prototypes] arch/powerpc/kernel/tau_6xx.c:126:6: error: no previous prototype for ‘TAUException’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/tau_6xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 8f02353c049e..e2ab8a111b69 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -53,7 +53,7 @@ struct timer_list tau_timer; #define shrink_timer 2*HZ /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ -void set_thresholds(unsigned long cpu) +static void set_thresholds(unsigned long cpu) { #ifdef CONFIG_TAU_INT /* @@ -73,7 +73,7 @@ void set_thresholds(unsigned long cpu) #endif } -void TAUupdate(int cpu) +static void TAUupdate(int cpu) { unsigned thrm; @@ -208,7 +208,7 @@ static void tau_timeout_smp(struct timer_list *unused) int tau_initialized = 0; -void __init TAU_init_smp(void * info) +static void __init TAU_init_smp(void *info) { unsigned long cpu = smp_processor_id(); @@ -220,7 +220,7 @@ void __init TAU_init_smp(void * info) set_thresholds(cpu); } -int __init TAU_init(void) +static int __init TAU_init(void) { /* We assume in SMP that if one CPU has TAU support, they * all have it --BenH -- cgit v1.2.3 From c89ca593220931c150cffda24b4d4ccf82f13fc8 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:20:03 +0100 Subject: powerpc/32: Add a missing include header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header file was missing from the includes. Fix the following warning, treated as error with W=1: arch/powerpc/kernel/pci_32.c:286:6: error: no previous prototype for ‘sys_pciconfig_iobase’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index af36e46c3ed6..d63b488d34d7 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From d647b210ac738b401c7f824bbebdcbcedbe7cb6b Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 22 Mar 2018 21:20:04 +0100 Subject: powerpc: Add a missing include header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header file was missing from the includes. Fix the following warning, treated as error with W=1: arch/powerpc/kernel/vecemu.c:260:5: error: no previous prototype for ‘emulate_altivec’ [-Werror=missing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vecemu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index 8812085883fd..4acd3fb2b38e 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include /* Functions in vector.S */ -- cgit v1.2.3 From 8ce621e1d946b1d1d7717337ab8dc3cbc4fd996f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 24 May 2018 22:48:34 -0500 Subject: powerpc/modules: remove unused mod_arch_specific.toc field The toc field in the mod_arch_specific struct isn't actually used anywhere, so remove it. Also the ftrace-specific fields are now common between 32-bit and 64-bit, so simplify the struct definition a bit by moving them out of the __powerpc64__ #ifdef. Signed-off-by: Josh Poimboeuf Reviewed-by: Kamalesh Babulal Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/module.h | 13 +++++-------- arch/powerpc/kernel/module_64.c | 1 - 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 18f7214d68b7..d8374f984f39 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -50,13 +50,6 @@ struct mod_arch_specific { unsigned int stubs_section; /* Index of stubs section in module */ unsigned int toc_section; /* What section is the TOC? */ bool toc_fixed; /* Have we fixed up .TOC.? */ -#ifdef CONFIG_DYNAMIC_FTRACE - unsigned long toc; - unsigned long tramp; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - unsigned long tramp_regs; -#endif -#endif /* For module function descriptor dereference */ unsigned long start_opd; @@ -65,10 +58,14 @@ struct mod_arch_specific { /* Indices of PLT sections within module. */ unsigned int core_plt_section; unsigned int init_plt_section; +#endif /* powerpc64 */ + #ifdef CONFIG_DYNAMIC_FTRACE unsigned long tramp; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + unsigned long tramp_regs; +#endif #endif -#endif /* powerpc64 */ /* List of BUG addresses, source line numbers and filenames */ struct list_head bug_list; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f7667e2ebfcb..1b7419579820 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -823,7 +823,6 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { - mod->arch.toc = my_r2(sechdrs, mod); mod->arch.tramp = create_ftrace_stub(sechdrs, mod, (unsigned long)ftrace_caller); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -- cgit v1.2.3 From 5e3f0d15ae5f95bdde8d092a0884d2defe27d448 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 22 May 2018 14:38:20 +0530 Subject: powerpc/livepatch: Fix build error with kprobes disabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/powerpc/kernel/stacktrace.c: In function ‘save_stack_trace_tsk_reliable’: arch/powerpc/kernel/stacktrace.c:176:28: error: ‘kretprobe_trampoline’ undeclared if (ip == (unsigned long)kretprobe_trampoline) ^~~~~~~~~~~~~~~~~~~~ Fixes: df78d3f61480 ("powerpc/livepatch: Implement reliable stack tracing for the consistency model") Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/stacktrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 26a50603177c..8dd6ba0c7d35 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -168,13 +168,14 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk, * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL); - +#ifdef CONFIG_KPROBES /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ if (ip == (unsigned long)kretprobe_trampoline) return 1; +#endif if (!trace->skip) trace->entries[trace->nr_entries++] = ip; -- cgit v1.2.3 From af3901cbbd3de182aafb8ee553c825c0074df6a2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 30 May 2018 22:19:20 +1000 Subject: powerpc/kbuild: Remove CROSS32 defines from top level powerpc Makefile Switch VDSO32 build over to use CROSS32_COMPILE directly, and have it pass in -m32 after the standard c_flags. This allows endianness overrides to be removed and the endian and bitness flags moved into standard flags variables. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 7 ------- arch/powerpc/boot/Makefile | 16 +++++++++++----- arch/powerpc/kernel/vdso32/Makefile | 15 +++++++++++---- 3 files changed, 22 insertions(+), 16 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index d628724087c6..167b26a0780c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -17,13 +17,8 @@ HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= -CROSS32CC := $(CROSS32_COMPILE)gcc -CROSS32AR := $(CROSS32_COMPILE)ar - ifeq ($(HAS_BIARCH),y) ifeq ($(CROSS32_COMPILE),) -CROSS32CC := $(CC) -m32 -KBUILD_ARFLAGS += --target=elf32-powerpc ifdef CONFIG_PPC32 # These options will be overridden by any -mcpu option that the CPU # or platform code sets later on the command line, but they are needed @@ -35,8 +30,6 @@ endif endif endif -export CROSS32CC CROSS32AR - ifeq ($(CROSS_COMPILE),) KBUILD_DEFCONFIG := $(shell uname -m)_defconfig else diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 26d5d2a5b8e9..49767e06202c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -23,19 +23,23 @@ all: $(obj)/zImage compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ +ifdef CROSS32_COMPILE + BOOTCC := $(CROSS32_COMPILE)gcc + BOOTAR := $(CROSS32_COMPILE)ar +else + BOOTCC := $(CC) + BOOTAR := $(AR) +endif + BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -D$(compress-y) -BOOTCC := $(CC) ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 else BOOTCFLAGS += -m32 -ifdef CROSS32_COMPILE - BOOTCC := $(CROSS32_COMPILE)gcc -endif endif BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) @@ -49,6 +53,8 @@ endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc +BOOTARFLAGS := -cr$(KBUILD_ARFLAGS) + ifdef CONFIG_DEBUG_INFO BOOTCFLAGS += -g endif @@ -202,7 +208,7 @@ quiet_cmd_bootas = BOOTAS $@ cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< quiet_cmd_bootar = BOOTAR $@ - cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ + cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ $(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE $(call if_changed_dep,bootcc) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index b8c434d1d459..50112d4473bb 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -8,8 +8,15 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \ # Build rules -ifeq ($(CONFIG_PPC32),y) -CROSS32CC := $(CC) +ifdef CROSS32_COMPILE + VDSOCC := $(CROSS32_COMPILE)gcc +else + VDSOCC := $(CC) +endif + +CC32FLAGS := +ifdef CONFIG_PPC64 +CC32FLAGS += -m32 endif targets := $(obj-vdso32) vdso32.so vdso32.so.dbg @@ -45,9 +52,9 @@ $(obj-vdso32): %.o: %.S FORCE # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) + cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< + cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ -- cgit v1.2.3 From 8af1da40669609707303eecdb857f48a5ba5792d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 29 May 2018 21:20:01 +0200 Subject: powerpc/prom: Fix %u/%llx usage since prom_printf() change In commit eae5f709a4d7 ("powerpc: Add __printf verification to prom_printf") __printf attribute was added to prom_printf(), which means GCC started warning about type/format mismatches. As part of that commit we changed some "%lx" formats to "%llx" where the type is actually unsigned long long. Unfortunately prom_printf() doesn't know how to print "%llx", it just prints a literal "lx", eg: reserved memory map: lx - lx lx - lx prom_printf() also doesn't know how to print "%u" (only "%lu"), it just prints a literal "u", eg: Max number of cores passed to firmware: u (NR_CPUS = 2048) Instead of: Max number of cores passed to firmware: 2048 (NR_CPUS = 2048) This commit adds support for the missing formatters. Fixes: eae5f709a4d7 ("powerpc: Add __printf verification to prom_printf") Reported-by: Michael Ellerman Reported-by: Stephen Rothwell Signed-off-by: Mathieu Malaterre Tested-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 73 +++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 425992e393bc..5425dd3d6a9f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -301,6 +301,10 @@ static void __init prom_print(const char *msg) } +/* + * Both prom_print_hex & prom_print_dec takes an unsigned long as input so that + * we do not need __udivdi3 or __umoddi3 on 32bits. + */ static void __init prom_print_hex(unsigned long val) { int i, nibbles = sizeof(val)*2; @@ -341,6 +345,7 @@ static void __init prom_printf(const char *format, ...) va_list args; unsigned long v; long vs; + int n = 0; va_start(args, format); for (p = format; *p != 0; p = q) { @@ -359,6 +364,10 @@ static void __init prom_printf(const char *format, ...) ++q; if (*q == 0) break; + while (*q == 'l') { + ++q; + ++n; + } switch (*q) { case 's': ++q; @@ -367,39 +376,55 @@ static void __init prom_printf(const char *format, ...) break; case 'x': ++q; - v = va_arg(args, unsigned long); + switch (n) { + case 0: + v = va_arg(args, unsigned int); + break; + case 1: + v = va_arg(args, unsigned long); + break; + case 2: + default: + v = va_arg(args, unsigned long long); + break; + } prom_print_hex(v); break; - case 'd': + case 'u': ++q; - vs = va_arg(args, int); - if (vs < 0) { - prom_print("-"); - vs = -vs; + switch (n) { + case 0: + v = va_arg(args, unsigned int); + break; + case 1: + v = va_arg(args, unsigned long); + break; + case 2: + default: + v = va_arg(args, unsigned long long); + break; } - prom_print_dec(vs); + prom_print_dec(v); break; - case 'l': + case 'd': ++q; - if (*q == 0) + switch (n) { + case 0: + vs = va_arg(args, int); break; - else if (*q == 'x') { - ++q; - v = va_arg(args, unsigned long); - prom_print_hex(v); - } else if (*q == 'u') { /* '%lu' */ - ++q; - v = va_arg(args, unsigned long); - prom_print_dec(v); - } else if (*q == 'd') { /* %ld */ - ++q; + case 1: vs = va_arg(args, long); - if (vs < 0) { - prom_print("-"); - vs = -vs; - } - prom_print_dec(vs); + break; + case 2: + default: + vs = va_arg(args, long long); + break; } + if (vs < 0) { + prom_print("-"); + vs = -vs; + } + prom_print_dec(vs); break; } } -- cgit v1.2.3 From c95998811807d897ca112ea62d66716ed733d058 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 May 2018 06:03:53 +0000 Subject: powerpc/64: Fix strncpy() related build failures with GCC 8.1 GCC 8.1 warns about possible string truncation: arch/powerpc/kernel/nvram_64.c:1042:2: error: 'strncpy' specified bound 12 equals destination size [-Werror=stringop-truncation] strncpy(new_part->header.name, name, 12); arch/powerpc/platforms/ps3/repository.c:106:2: error: 'strncpy' output truncated before terminating nul copying 8 bytes from a string of the same length [-Werror=stringop-truncation] strncpy((char *)&n, text, 8); Fix it by using memcpy(). To make that safe we need to ensure the destination is pre-zeroed. Use kzalloc() in the nvram code and initialise the u64 to zero in the ps3 code. Signed-off-by: Christophe Leroy [mpe: Use kzalloc() in the nvram code, flesh out change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/nvram_64.c | 4 ++-- arch/powerpc/platforms/ps3/repository.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index ba681dac7b46..22e9d281324d 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -1030,7 +1030,7 @@ loff_t __init nvram_create_partition(const char *name, int sig, return -ENOSPC; /* Create our OS partition */ - new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); + new_part = kzalloc(sizeof(*new_part), GFP_KERNEL); if (!new_part) { pr_err("%s: kmalloc failed\n", __func__); return -ENOMEM; @@ -1039,7 +1039,7 @@ loff_t __init nvram_create_partition(const char *name, int sig, new_part->index = free_part->index; new_part->header.signature = sig; new_part->header.length = size; - strncpy(new_part->header.name, name, 12); + memcpy(new_part->header.name, name, strnlen(name, sizeof(new_part->header.name))); new_part->header.checksum = nvram_checksum(&new_part->header); rc = nvram_write_header(new_part); diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 50dbaf24b1ee..e49c887787c4 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -101,9 +101,9 @@ static u64 make_first_field(const char *text, u64 index) static u64 make_field(const char *text, u64 index) { - u64 n; + u64 n = 0; - strncpy((char *)&n, text, 8); + memcpy((char *)&n, text, strnlen(text, sizeof(n))); return n + index; } -- cgit v1.2.3 From 2479bfc9bc600dcce7f932d52dcfa8d677c41f93 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 May 2018 16:06:41 +0000 Subject: powerpc: Fix build by disabling attribute-alias warning for SYSCALL_DEFINEx GCC 8.1 emits warnings such as the following. As arch/powerpc code is built with -Werror, this breaks the build with GCC 8.1. In file included from arch/powerpc/kernel/pci_64.c:23: ./include/linux/syscalls.h:233:18: error: 'sys_pciconfig_iobase' alias between functions of incompatible types 'long int(long int, long unsigned int, long unsigned int)' and 'long int(long int, long int, long int)' [-Werror=attribute-alias] asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ ^~~ ./include/linux/syscalls.h:222:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) This patch inhibits those warnings. Signed-off-by: Christophe Leroy [mpe: Trim change log] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_64.c | 4 ++++ arch/powerpc/kernel/rtas.c | 4 ++++ arch/powerpc/kernel/signal_32.c | 8 ++++++++ arch/powerpc/kernel/signal_64.c | 4 ++++ arch/powerpc/kernel/syscalls.c | 4 ++++ arch/powerpc/mm/subpage-prot.c | 4 ++++ 6 files changed, 28 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index dff28f903512..812171c09f42 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -203,6 +203,9 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) #define IOBASE_ISA_IO 3 #define IOBASE_ISA_MEM 4 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, unsigned long, in_devfn) { @@ -256,6 +259,7 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, return -EOPNOTSUPP; } +#pragma GCC diagnostic pop #ifdef CONFIG_NUMA int pcibus_to_node(struct pci_bus *bus) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8afd146bc9c7..7fb9f83dcde8 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1051,6 +1051,9 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, } /* We assume to be passed big endian arguments */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { struct rtas_args args; @@ -1137,6 +1140,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } +#pragma GCC diagnostic pop /* * Call early during boot, before mem init, to retrieve the RTAS diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 9cf8a03d3bc7..342ac78f620f 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1037,6 +1037,9 @@ static int do_setcontext_tm(struct ucontext __user *ucp, } #endif +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" #ifdef CONFIG_PPC64 COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, int, ctx_size) @@ -1132,6 +1135,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } +#pragma GCC diagnostic pop #ifdef CONFIG_PPC64 COMPAT_SYSCALL_DEFINE0(rt_sigreturn) @@ -1228,6 +1232,9 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" #ifdef CONFIG_PPC32 SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, int, ndbg, struct sig_dbg_op __user *, dbg) @@ -1333,6 +1340,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, return 0; } #endif +#pragma GCC diagnostic pop /* * OK, we're invoking a handler diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 83d51bf586c7..d42b60020389 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -625,6 +625,9 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) /* * Handle {get,set,swap}_context operations */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { @@ -690,6 +693,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } +#pragma GCC diagnostic pop /* diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 466216506eb2..083fa06962fd 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -62,6 +62,9 @@ out: return ret; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -75,6 +78,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } +#pragma GCC diagnostic pop #ifdef CONFIG_PPC32 /* diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 9d16ee251fc0..75cb646a79c3 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -186,6 +186,9 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, * in a 2-bit field won't allow writes to a page that is otherwise * write-protected. */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { @@ -269,3 +272,4 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, up_write(&mm->mmap_sem); return err; } +#pragma GCC diagnostic pop -- cgit v1.2.3 From ebb37cf3ffd39fdb6ec5b07111f8bb2f11d92c5f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:25 +1000 Subject: powerpc/64: irq_work avoid interrupt when called with hardware irqs enabled irq_work_raise should not cause a decrementer exception unless it is called from NMI context. Doing so often just results in an immediate masked decrementer interrupt: <...>-550 90d... 4us : update_curr_rt <-dequeue_task_rt <...>-550 90d... 5us : dbs_update_util_handler <-update_curr_rt <...>-550 90d... 6us : arch_irq_work_raise <-irq_work_queue <...>-550 90d... 7us : soft_nmi_interrupt <-soft_nmi_common <...>-550 90d... 7us : printk_nmi_enter <-soft_nmi_interrupt <...>-550 90d.Z. 8us : rcu_nmi_enter <-soft_nmi_interrupt <...>-550 90d.Z. 9us : rcu_nmi_exit <-soft_nmi_interrupt <...>-550 90d... 9us : printk_nmi_exit <-soft_nmi_interrupt <...>-550 90d... 10us : cpuacct_charge <-update_curr_rt The soft_nmi_interrupt here is the call into the watchdog, due to the decrementer interrupt firing with irqs soft-disabled. This is harmless, but sub-optimal. When it's not called from NMI context or with interrupts enabled, mark the decrementer pending in the irq_happened mask directly, rather than having the masked decrementer interupt handler do it. This will be replayed at the next local_irq_enable. See the comment for details. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/time.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 360e71d455cc..e7e8611e8863 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -513,6 +513,35 @@ static inline void clear_irq_work_pending(void) "i" (offsetof(struct paca_struct, irq_work_pending))); } +void arch_irq_work_raise(void) +{ + preempt_disable(); + set_irq_work_pending_flag(); + /* + * Non-nmi code running with interrupts disabled will replay + * irq_happened before it re-enables interrupts, so setthe + * decrementer there instead of causing a hardware exception + * which would immediately hit the masked interrupt handler + * and have the net effect of setting the decrementer in + * irq_happened. + * + * NMI interrupts can not check this when they return, so the + * decrementer hardware exception is raised, which will fire + * when interrupts are next enabled. + * + * BookE does not support this yet, it must audit all NMI + * interrupt handlers to ensure they call nmi_enter() so this + * check would be correct. + */ + if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) { + set_dec(1); + } else { + hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_DEC; + } + preempt_enable(); +} + #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); @@ -521,8 +550,6 @@ DEFINE_PER_CPU(u8, irq_work_pending); #define test_irq_work_pending() __this_cpu_read(irq_work_pending) #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) -#endif /* 32 vs 64 bit */ - void arch_irq_work_raise(void) { preempt_disable(); @@ -531,6 +558,8 @@ void arch_irq_work_raise(void) preempt_enable(); } +#endif /* 32 vs 64 bit */ + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 -- cgit v1.2.3 From 36d632ea831fd2fa3cb62599a465825f59076f64 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:29 +1000 Subject: powerpc/64: remove start_tb and accum_tb from thread_struct These fields are only written to. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 4 ---- arch/powerpc/kernel/process.c | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c4b36a494a63..eff269adfa71 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -264,10 +264,6 @@ struct thread_struct { struct thread_fp_state *fp_save_area; int fpexc_mode; /* floating-point exception mode */ unsigned int align_ctl; /* alignment handling control */ -#ifdef CONFIG_PPC64 - unsigned long start_tb; /* Start purr when proc switched in */ - unsigned long accum_tb; /* Total accumulated purr for process */ -#endif #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bps[HBP_NUM]; /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 25db000fa5b3..f4e5291584c5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1188,11 +1188,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); - long unsigned start_tb, current_tb; - start_tb = old_thread->start_tb; - cu->current_tb = current_tb = mfspr(SPRN_PURR); - old_thread->accum_tb += (current_tb - start_tb); - new_thread->start_tb = current_tb; + cu->current_tb = mfspr(SPRN_PURR); } #endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From 3d3a6021ddcbe9c31520e4e7b65e5ce5dc58274d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:30 +1000 Subject: powerpc/pseries: lparcfg calculate PURR on demand For SPLPAR, lparcfg provides a sum of PURR registers for all CPUs. Currently this is done by reading PURR in context switch and timer interrupt, and storing that into a per-CPU variable. These are summed to provide the value. This does not work with all timer schemes (e.g., NO_HZ_FULL), and it is sub-optimal for performance because it reads the PURR register on every context switch, although that's been difficult to distinguish from noise in the contxt_switch microbenchmark. This patch implements the sum by calling a function on each CPU, to read and add PURR values of each CPU. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/time.h | 8 -------- arch/powerpc/kernel/process.c | 14 -------------- arch/powerpc/kernel/time.c | 8 -------- arch/powerpc/platforms/pseries/lparcfg.c | 18 ++++++++++-------- 4 files changed, 10 insertions(+), 38 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index db546c034905..c965c79765c4 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -196,14 +196,6 @@ extern u64 mulhdu(u64, u64); extern void div128_by_32(u64 dividend_high, u64 dividend_low, unsigned divisor, struct div_result *dr); -/* Used to store Processor Utilization register (purr) values */ - -struct cpu_usage { - u64 current_tb; /* Holds the current purr register values */ -}; - -DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array); - extern void secondary_cpu_time_init(void); extern void __init time_init(void); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f4e5291584c5..2a7fa5000cce 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -846,10 +846,6 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); -#ifdef CONFIG_PPC64 -DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); -#endif - static inline bool hw_brk_match(struct arch_hw_breakpoint *a, struct arch_hw_breakpoint *b) { @@ -1182,16 +1178,6 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC64 - /* - * Collect processor utilization data per process - */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); - cu->current_tb = mfspr(SPRN_PURR); - } -#endif /* CONFIG_PPC64 */ - #ifdef CONFIG_PPC_BOOK3S_64 batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e7e8611e8863..1fe6a24357e7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -597,14 +597,6 @@ static void __timer_interrupt(void) __this_cpu_inc(irq_stat.timer_irqs_others); } -#ifdef CONFIG_PPC64 - /* collect purr register values often, for accurate calculations */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - struct cpu_usage *cu = this_cpu_ptr(&cpu_usage_array); - cu->current_tb = mfspr(SPRN_PURR); - } -#endif - trace_timer_interrupt_exit(regs); } diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index c508c938dc71..7c872dc01bdb 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -52,18 +52,20 @@ * Track sum of all purrs across all processors. This is used to further * calculate usage values by different applications */ +static void cpu_get_purr(void *arg) +{ + atomic64_t *sum = arg; + + atomic64_add(mfspr(SPRN_PURR), sum); +} + static unsigned long get_purr(void) { - unsigned long sum_purr = 0; - int cpu; + atomic64_t purr = ATOMIC64_INIT(0); - for_each_possible_cpu(cpu) { - struct cpu_usage *cu; + on_each_cpu(cpu_get_purr, &purr, 1); - cu = &per_cpu(cpu_usage_array, cpu); - sum_purr += cu->current_tb; - } - return sum_purr; + return atomic64_read(&purr); } /* -- cgit v1.2.3 From 3f984620f9a4fe089c0a3c951b75a460211394bb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:31 +1000 Subject: powerpc: generic clockevents broadcast receiver call tick_receive_broadcast The broadcast tick recipient can call tick_receive_broadcast rather than re-running the full timer interrupt. It does not have to check for the next event time, because the sender already determined the timer has expired. It does not have to test irq_work_pending, because that's a direct decrementer interrupt and does not go through the clock events subsystem. And it does not have to read PURR because that was removed with the previous patch. This results in no code size change, but both the decrementer and broadcast path lengths are reduced. Cc: Srivatsa S. Bhat Cc: Preeti U Murthy Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hw_irq.h | 1 + arch/powerpc/include/asm/time.h | 1 - arch/powerpc/kernel/smp.c | 4 +- arch/powerpc/kernel/time.c | 84 ++++++++++++++++++--------------------- 4 files changed, 42 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3be8766427ef..9aec7237f8c2 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -55,6 +55,7 @@ extern void replay_system_reset(void); extern void __replay_interrupt(unsigned int vector); extern void timer_interrupt(struct pt_regs *); +extern void timer_broadcast_interrupt(void); extern void performance_monitor_exception(struct pt_regs *regs); extern void WatchdogException(struct pt_regs *regs); extern void unknown_exception(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index c965c79765c4..69b89f941252 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -28,7 +28,6 @@ extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); -extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); extern void hdec_interrupt(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c96f8fbc1942..f66eec89c14c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -158,7 +158,7 @@ static irqreturn_t reschedule_action(int irq, void *data) static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { - tick_broadcast_ipi_handler(); + timer_broadcast_interrupt(); return IRQ_HANDLED; } @@ -279,7 +279,7 @@ irqreturn_t smp_ipi_demux_relaxed(void) if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) - tick_broadcast_ipi_handler(); + timer_broadcast_interrupt(); #ifdef CONFIG_NMI_IPI if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) nmi_ipi_action(0, NULL); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1fe6a24357e7..ad876906f847 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -567,47 +567,16 @@ void arch_irq_work_raise(void) #endif /* CONFIG_IRQ_WORK */ -static void __timer_interrupt(void) -{ - struct pt_regs *regs = get_irq_regs(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - struct clock_event_device *evt = this_cpu_ptr(&decrementers); - u64 now; - - trace_timer_interrupt_entry(regs); - - if (test_irq_work_pending()) { - clear_irq_work_pending(); - irq_work_run(); - } - - now = get_tb_or_rtc(); - if (now >= *next_tb) { - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - __this_cpu_inc(irq_stat.timer_irqs_event); - } else { - now = *next_tb - now; - if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); - __this_cpu_inc(irq_stat.timer_irqs_others); - } - - trace_timer_interrupt_exit(regs); -} - /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs * regs) +void timer_interrupt(struct pt_regs *regs) { - struct pt_regs *old_regs; + struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct pt_regs *old_regs; + u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -638,13 +607,47 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); + trace_timer_interrupt_entry(regs); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); + } + + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + __this_cpu_inc(irq_stat.timer_irqs_event); + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec(now); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + __this_cpu_inc(irq_stat.timer_irqs_others); + } - __timer_interrupt(); + trace_timer_interrupt_exit(regs); irq_exit(); set_irq_regs(old_regs); } EXPORT_SYMBOL(timer_interrupt); +void timer_broadcast_interrupt(void) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + struct pt_regs *regs = get_irq_regs(); + + trace_timer_interrupt_entry(regs); + *next_tb = ~(u64)0; + tick_receive_broadcast(); + __this_cpu_inc(irq_stat.timer_irqs_event); + trace_timer_interrupt_exit(regs); +} + /* * Hypervisor decrementer interrupts shouldn't occur but are sometimes * left pending on exit from a KVM guest. We don't need to do anything @@ -992,15 +995,6 @@ static int decrementer_shutdown(struct clock_event_device *dev) return 0; } -/* Interrupt handler for the timer broadcast IPI */ -void tick_broadcast_ipi_handler(void) -{ - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - *next_tb = get_tb_or_rtc(); - __timer_interrupt(); -} - static void register_decrementer_clockevent(int cpu) { struct clock_event_device *dec = &per_cpu(decrementers, cpu); -- cgit v1.2.3 From a7cba02deceda96df8018a827e6715d6f37be7b5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:32 +1000 Subject: powerpc: allow soft-NMI watchdog to cover timer interrupts with large decrementers Large decrementers (e.g., POWER9) can take a very long time to wrap, so when the timer iterrupt handler sets the decrementer to max so as to avoid taking another decrementer interrupt when hard enabling interrupts before running timers, it effectively disables the soft NMI coverage for timer interrupts. Fix this by using the traditional 31-bit value instead, which wraps after a few seconds. masked interrupt code does the same thing, and in normal operation neither of these paths would ever wrap even the 31 bit value. Note: the SMP watchdog should catch timer interrupt lockups, but it is preferable for the local soft-NMI to catch them, mainly to avoid the IPI. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/time.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ad876906f847..5862a3611795 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -578,22 +578,29 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. - */ - set_dec(decrementer_max); - /* Some implementations of hotplug will get timer interrupts while * offline, just ignore these and we also need to set * decrementers_next_tb as MAX to make sure __check_irq_replay * don't replay timer interrupt when return, otherwise we'll trap * here infinitely :( */ - if (!cpu_online(smp_processor_id())) { + if (unlikely(!cpu_online(smp_processor_id()))) { *next_tb = ~(u64)0; + set_dec(decrementer_max); return; } + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. When the + * PPC_WATCHDOG (decrementer based) is configured, keep this at most + * 31 bits, which is about 4 seconds on most systems, which gives + * the watchdog a chance of catching timer interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + /* Conditionally hard-enable interrupts now that the DEC has been * bumped to its maximum value */ -- cgit v1.2.3 From bc9071133144acdbdb28cfc6ee5ce983d8fd5f81 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:33 +1000 Subject: powerpc: move timer broadcast code under GENERIC_CLOCKEVENTS_BROADCAST ifdef Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 8 ++++++++ arch/powerpc/kernel/time.c | 2 ++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f66eec89c14c..6f5e3a6e259c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -156,11 +156,13 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { timer_broadcast_interrupt(); return IRQ_HANDLED; } +#endif #ifdef CONFIG_NMI_IPI static irqreturn_t nmi_ipi_action(int irq, void *data) @@ -173,7 +175,9 @@ static irqreturn_t nmi_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, +#endif #ifdef CONFIG_NMI_IPI [PPC_MSG_NMI_IPI] = nmi_ipi_action, #endif @@ -187,7 +191,9 @@ static irq_handler_t smp_ipi_action[] = { const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", +#endif [PPC_MSG_NMI_IPI] = "nmi ipi", }; @@ -278,8 +284,10 @@ irqreturn_t smp_ipi_demux_relaxed(void) generic_smp_call_function_interrupt(); if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) timer_broadcast_interrupt(); +#endif #ifdef CONFIG_NMI_IPI if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) nmi_ipi_action(0, NULL); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5862a3611795..23921f7b6e67 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -643,6 +643,7 @@ void timer_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL(timer_interrupt); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST void timer_broadcast_interrupt(void) { u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); @@ -654,6 +655,7 @@ void timer_broadcast_interrupt(void) __this_cpu_inc(irq_stat.timer_irqs_event); trace_timer_interrupt_exit(regs); } +#endif /* * Hypervisor decrementer interrupts shouldn't occur but are sometimes -- cgit v1.2.3 From 21bfd6a8e9999f40f9eae09ca6ba33e7f75f0be4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:34 +1000 Subject: powerpc: move a stray NMI IPI case under NMI_IPI ifdef Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6f5e3a6e259c..b009a562c76b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -194,7 +194,9 @@ const char *smp_ipi_name[] = { #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", #endif +#ifdef CONFIG_NMI_IPI [PPC_MSG_NMI_IPI] = "nmi ipi", +#endif }; /* optional function to request ipi, for controllers with >= 4 ipis */ -- cgit v1.2.3 From e360cd37f0e9bac7b5f623132549e2d4b6417399 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 5 May 2018 03:19:35 +1000 Subject: powerpc/time: account broadcast timer event interrupts separately These are not local timer interrupts but IPIs. It's good to be able to see how timer offloading is behaving, so split these out into their own category. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hardirq.h | 1 + arch/powerpc/kernel/irq.c | 6 ++++++ arch/powerpc/kernel/time.c | 5 +---- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 5986d473722b..20b01897ea5d 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -8,6 +8,7 @@ typedef struct { unsigned int __softirq_pending; unsigned int timer_irqs_event; + unsigned int broadcast_irqs_event; unsigned int timer_irqs_others; unsigned int pmu_irqs; unsigned int mce_exceptions; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index bbf7ec582d60..0682fef1f385 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -508,6 +508,11 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); seq_printf(p, " Local timer interrupts for timer event device\n"); + seq_printf(p, "%*s: ", prec, "BCT"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_printf(p, " Broadcast timer interrupts for timer event device\n"); + seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); @@ -567,6 +572,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event; + sum += per_cpu(irq_stat, cpu).broadcast_irqs_event; sum += per_cpu(irq_stat, cpu).pmu_irqs; sum += per_cpu(irq_stat, cpu).mce_exceptions; sum += per_cpu(irq_stat, cpu).spurious_irqs; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 23921f7b6e67..ed6b2abdde15 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -647,13 +647,10 @@ EXPORT_SYMBOL(timer_interrupt); void timer_broadcast_interrupt(void) { u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - struct pt_regs *regs = get_irq_regs(); - trace_timer_interrupt_entry(regs); *next_tb = ~(u64)0; tick_receive_broadcast(); - __this_cpu_inc(irq_stat.timer_irqs_event); - trace_timer_interrupt_exit(regs); + __this_cpu_inc(irq_stat.broadcast_irqs_event); } #endif -- cgit v1.2.3 From 3130a7bb6eb595f2d963976a4d3e57db77bcf06f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 10 May 2018 11:04:24 +1000 Subject: powerpc/64: change softe to irqmask in show_regs and xmon When the soft enabled flag was changed to a soft disable mask, xmon and register dump code was not updated to reflect that, which is confusing ('SOFTE: 1' previously meant interrupts were soft enabled, currently it means the opposite, the general interrupt type has been disabled). Fix this by using the name irqmask, and printing it in hex. Signed-off-by: Nicholas Piggin Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2a7fa5000cce..8f35b30956f4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1420,7 +1420,7 @@ void show_regs(struct pt_regs * regs) pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - pr_cont("SOFTE: %ld ", regs->softe); + pr_cont("IRQMASK: %lx ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d94a41254b11..0561c14b276b 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1637,7 +1637,7 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%px\n", current); #ifdef CONFIG_PPC64 - printf(" paca = 0x%px\t softe: %d\t irq_happened: 0x%02x\n", + printf(" paca = 0x%px\t irqmask: 0x%02x\t irq_happened: 0x%02x\n", local_paca, local_paca->irq_soft_mask, local_paca->irq_happened); #endif if (current) { -- cgit v1.2.3 From 819844285ef2b5d15466f5b5062514135ffba06c Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 11 May 2018 16:12:57 +1000 Subject: powerpc: Add TIDR CPU feature for POWER9 This patch adds a CPU feature bit to show whether the CPU has the TIDR register available, enabling as_notify/wait in userspace. Signed-off-by: Alastair D'Silva Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 3 ++- arch/powerpc/kernel/dt_cpu_ftrs.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 66fcab13c8b4..9c0a3083571b 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -215,6 +215,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000) #define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000) #define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000) +#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000) #ifndef __ASSEMBLY__ @@ -462,7 +463,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \ - CPU_FTR_P9_TLBIE_BUG) + CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index c904477abaf3..4be1c0de9406 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -717,6 +717,7 @@ static __init void cpufeatures_cpu_quirks(void) if ((version & 0xffff0000) == 0x004e0000) { cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR); cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG; + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR; } /* -- cgit v1.2.3 From 3449f191ca9be1a6ac9757b8ab55f239092362e5 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 11 May 2018 16:12:58 +1000 Subject: powerpc: Use TIDR CPU feature to control TIDR allocation Switch the use of TIDR on it's CPU feature, rather than assuming it is available based on architecture. Signed-off-by: Alastair D'Silva Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8f35b30956f4..e8b1d3c30669 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1151,7 +1151,7 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TAR, new_thread->tar); } - if (cpu_has_feature(CPU_FTR_ARCH_300) && + if (cpu_has_feature(CPU_FTR_P9_TIDR) && old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); #endif @@ -1553,7 +1553,7 @@ void clear_thread_tidr(struct task_struct *t) if (!t->thread.tidr) return; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + if (!cpu_has_feature(CPU_FTR_P9_TIDR)) { WARN_ON_ONCE(1); return; } @@ -1576,7 +1576,7 @@ int set_thread_tidr(struct task_struct *t) { int rc; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) + if (!cpu_has_feature(CPU_FTR_P9_TIDR)) return -EINVAL; if (t != current) -- cgit v1.2.3 From 71cc64a85d8d99936f6851709a07f18c87a0adab Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 11 May 2018 16:12:59 +1000 Subject: powerpc: use task_pid_nr() for TID allocation The current implementation of TID allocation, using a global IDR, may result in an errant process starving the system of available TIDs. Instead, use task_pid_nr(), as mentioned by the original author. The scenario described which prevented it's use is not applicable, as set_thread_tidr can only be called after the task struct has been populated. In the unlikely event that 2 threads share the TID and are waiting, all potential outcomes have been determined safe. Signed-off-by: Alastair D'Silva Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 1 - arch/powerpc/kernel/process.c | 122 ++++++++--------------------------- 2 files changed, 28 insertions(+), 95 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index be8c9fa23983..5b03d8a82409 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -94,6 +94,5 @@ static inline void clear_task_ebb(struct task_struct *t) extern int set_thread_uses_vas(void); extern int set_thread_tidr(struct task_struct *t); -extern void clear_thread_tidr(struct task_struct *t); #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e8b1d3c30669..ebcd3956f2be 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1479,103 +1479,41 @@ int set_thread_uses_vas(void) } #ifdef CONFIG_PPC64 -static DEFINE_SPINLOCK(vas_thread_id_lock); -static DEFINE_IDA(vas_thread_ida); - -/* - * We need to assign a unique thread id to each thread in a process. +/** + * Assign a TIDR (thread ID) for task @t and set it in the thread + * structure. For now, we only support setting TIDR for 'current' task. * - * This thread id, referred to as TIDR, and separate from the Linux's tgid, - * is intended to be used to direct an ASB_Notify from the hardware to the - * thread, when a suitable event occurs in the system. + * Since the TID value is a truncated form of it PID, it is possible + * (but unlikely) for 2 threads to have the same TID. In the unlikely event + * that 2 threads share the same TID and are waiting, one of the following + * cases will happen: * - * One such event is a "paste" instruction in the context of Fast Thread - * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard - * (VAS) in POWER9. + * 1. The correct thread is running, the wrong thread is not + * In this situation, the correct thread is woken and proceeds to pass it's + * condition check. * - * To get a unique TIDR per process we could simply reuse task_pid_nr() but - * the problem is that task_pid_nr() is not yet available copy_thread() is - * called. Fixing that would require changing more intrusive arch-neutral - * code in code path in copy_process()?. + * 2. Neither threads are running + * In this situation, neither thread will be woken. When scheduled, the waiting + * threads will execute either a wait, which will return immediately, followed + * by a condition check, which will pass for the correct thread and fail + * for the wrong thread, or they will execute the condition check immediately. * - * Further, to assign unique TIDRs within each process, we need an atomic - * field (or an IDR) in task_struct, which again intrudes into the arch- - * neutral code. So try to assign globally unique TIDRs for now. + * 3. The wrong thread is running, the correct thread is not + * The wrong thread will be woken, but will fail it's condition check and + * re-execute wait. The correct thread, when scheduled, will execute either + * it's condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by it's + * condition check (which will pass). * - * NOTE: TIDR 0 indicates that the thread does not need a TIDR value. - * For now, only threads that expect to be notified by the VAS - * hardware need a TIDR value and we assign values > 0 for those. - */ -#define MAX_THREAD_CONTEXT ((1 << 16) - 1) -static int assign_thread_tidr(void) -{ - int index; - int err; - unsigned long flags; - -again: - if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock_irqsave(&vas_thread_id_lock, flags); - err = ida_get_new_above(&vas_thread_ida, 1, &index); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > MAX_THREAD_CONTEXT) { - spin_lock_irqsave(&vas_thread_id_lock, flags); - ida_remove(&vas_thread_ida, index); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); - return -ENOMEM; - } - - return index; -} - -static void free_thread_tidr(int id) -{ - unsigned long flags; - - spin_lock_irqsave(&vas_thread_id_lock, flags); - ida_remove(&vas_thread_ida, id); - spin_unlock_irqrestore(&vas_thread_id_lock, flags); -} - -/* - * Clear any TIDR value assigned to this thread. - */ -void clear_thread_tidr(struct task_struct *t) -{ - if (!t->thread.tidr) - return; - - if (!cpu_has_feature(CPU_FTR_P9_TIDR)) { - WARN_ON_ONCE(1); - return; - } - - mtspr(SPRN_TIDR, 0); - free_thread_tidr(t->thread.tidr); - t->thread.tidr = 0; -} - -void arch_release_task_struct(struct task_struct *t) -{ - clear_thread_tidr(t); -} - -/* - * Assign a unique TIDR (thread id) for task @t and set it in the thread - * structure. For now, we only support setting TIDR for 'current' task. + * 4. Both threads are running + * Both threads will be woken. The wrong thread will fail it's condition check + * and execute another wait, while the correct thread will pass it's condition + * check. + * + * @t: the task to set the thread ID for */ int set_thread_tidr(struct task_struct *t) { - int rc; - if (!cpu_has_feature(CPU_FTR_P9_TIDR)) return -EINVAL; @@ -1585,11 +1523,7 @@ int set_thread_tidr(struct task_struct *t) if (t->thread.tidr) return 0; - rc = assign_thread_tidr(); - if (rc < 0) - return rc; - - t->thread.tidr = rc; + t->thread.tidr = (u16)task_pid_nr(t); mtspr(SPRN_TIDR, t->thread.tidr); return 0; -- cgit v1.2.3 From 91d06971881f71d945910de128658038513d1b24 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 30 May 2018 18:48:04 +0530 Subject: powerpc/mm/hash: Add missing isync prior to kernel stack SLB switch Currently we do not have an isync, or any other context synchronizing instruction prior to the slbie/slbmte in _switch() that updates the SLB entry for the kernel stack. However that is not correct as outlined in the ISA. From Power ISA Version 3.0B, Book III, Chapter 11, page 1133: "Changing the contents of ... the contents of SLB entries ... can have the side effect of altering the context in which data addresses and instruction addresses are interpreted, and in which instructions are executed and data accesses are performed. ... These side effects need not occur in program order, and therefore may require explicit synchronization by software. ... The synchronizing instruction before the context-altering instruction ensures that all instructions up to and including that synchronizing instruction are fetched and executed in the context that existed before the alteration." And page 1136: "For data accesses, the context synchronizing instruction before the slbie, slbieg, slbia, slbmte, tlbie, or tlbiel instruction ensures that all preceding instructions that access data storage have completed to a point at which they have reported all exceptions they will cause." We're not aware of any bugs caused by this, but it should be fixed regardless. Add the missing isync when updating kernel stack SLB entry. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V [mpe: Flesh out change log with more ISA text & explanation] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 51695608c68b..3d1af55e09dc 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -596,6 +596,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) * actually hit this code path. */ + isync slbie r6 slbie r6 /* Workaround POWER5 < DD2.1 issue */ slbmte r7,r0 -- cgit v1.2.3 From 2dc20f454dcf82c52ed41362ce0b3140ce8ad4be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Apr 2018 10:36:39 +0200 Subject: powerpc: rtas: clean up time handling The to_tm() helper function operates on a signed integer for the time, so it will suffer from overflow in 2038, even on 64-bit kernels. Rather than fix that function, this replaces its use in the rtas procfs implementation with the standard rtc_time64_to_tm() helper that is very similar but is not affected by the overflow. In order to actually support long times, the parser function gets changed to 64-bit user input and output as well. Note that the tm_mon and tm_year representation is slightly different, so we have to manually add an offset here. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas-proc.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 11ef978e95db..5f13c8358aeb 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -280,7 +280,7 @@ static int __init proc_rtas_init(void) __initcall(proc_rtas_init); -static int parse_number(const char __user *p, size_t count, unsigned long *val) +static int parse_number(const char __user *p, size_t count, u64 *val) { char buf[40]; char *end; @@ -293,7 +293,7 @@ static int parse_number(const char __user *p, size_t count, unsigned long *val) buf[count] = 0; - *val = simple_strtoul(buf, &end, 10); + *val = simple_strtoull(buf, &end, 10); if (*end && *end != '\n') return -EINVAL; @@ -307,17 +307,17 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct rtc_time tm; - unsigned long nowtime; + time64_t nowtime; int error = parse_number(buf, count, &nowtime); if (error) return error; power_on_time = nowtime; /* save the time */ - to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", @@ -373,14 +373,14 @@ static ssize_t ppc_rtas_clock_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct rtc_time tm; - unsigned long nowtime; + time64_t nowtime; int error = parse_number(buf, count, &nowtime); if (error) return error; - to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", @@ -401,8 +401,8 @@ static int ppc_rtas_clock_show(struct seq_file *m, void *v) unsigned int year, mon, day, hour, min, sec; year = ret[0]; mon = ret[1]; day = ret[2]; hour = ret[3]; min = ret[4]; sec = ret[5]; - seq_printf(m, "%lu\n", - mktime(year, mon, day, hour, min, sec)); + seq_printf(m, "%lld\n", + mktime64(year, mon, day, hour, min, sec)); } return 0; } @@ -731,7 +731,7 @@ static void get_location_code(struct seq_file *m, struct individual_sensor *s, static ssize_t ppc_rtas_tone_freq_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned long freq; + u64 freq; int error = parse_number(buf, count, &freq); if (error) return error; @@ -756,7 +756,7 @@ static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v) static ssize_t ppc_rtas_tone_volume_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - unsigned long volume; + u64 volume; int error = parse_number(buf, count, &volume); if (error) return error; -- cgit v1.2.3 From 5bfd643583b2e2a203163fd6b617cd9027054200 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Apr 2018 10:36:40 +0200 Subject: powerpc: use time64_t in read_persistent_clock Looking through the remaining users of the deprecated mktime() function, I found the powerpc rtc handlers, which use it in place of rtc_tm_to_time64(). To clean this up, I'm changing over the read_persistent_clock() function to the read_persistent_clock64() variant, and change all the platform specific handlers along with it. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/opal.h | 2 +- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/kernel/rtas-rtc.c | 4 ++-- arch/powerpc/kernel/time.c | 7 +++---- arch/powerpc/platforms/8xx/m8xx_setup.c | 4 +--- arch/powerpc/platforms/maple/maple.h | 2 +- arch/powerpc/platforms/maple/time.c | 5 ++--- arch/powerpc/platforms/pasemi/pasemi.h | 2 +- arch/powerpc/platforms/pasemi/time.c | 4 ++-- arch/powerpc/platforms/powermac/pmac.h | 2 +- arch/powerpc/platforms/powermac/time.c | 31 +++++++++++-------------------- arch/powerpc/platforms/powernv/opal-rtc.c | 5 ++--- arch/powerpc/platforms/ps3/platform.h | 2 +- arch/powerpc/platforms/ps3/time.c | 2 +- 15 files changed, 31 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index ffe7c71e1132..a47de82fb8e2 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -83,7 +83,7 @@ struct machdep_calls { int (*set_rtc_time)(struct rtc_time *); void (*get_rtc_time)(struct rtc_time *); - unsigned long (*get_boot_time)(void); + time64_t (*get_boot_time)(void); unsigned char (*rtc_read_val)(int addr); void (*rtc_write_val)(int addr, unsigned char val); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 1dbeb6cd68fa..e1b2910c6e81 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -328,7 +328,7 @@ extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data); struct rtc_time; -extern unsigned long opal_get_boot_time(void); +extern time64_t opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_update_init(void); extern void opal_flash_update_print_message(void); diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index ec9dd79398ee..71e393c46a49 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -361,7 +361,7 @@ extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; -extern unsigned long rtas_get_boot_time(void); +extern time64_t rtas_get_boot_time(void); extern void rtas_get_rtc_time(struct rtc_time *rtc_time); extern int rtas_set_rtc_time(struct rtc_time *rtc_time); diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 49600985c7ef..a28239b8b0c0 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -13,7 +13,7 @@ #define MAX_RTC_WAIT 5000 /* 5 sec */ #define RTAS_CLOCK_BUSY (-2) -unsigned long __init rtas_get_boot_time(void) +time64_t __init rtas_get_boot_time(void) { int ret[8]; int error; @@ -38,7 +38,7 @@ unsigned long __init rtas_get_boot_time(void) return 0; } - return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); + return mktime64(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]); } /* NOTE: get_rtc_time will get an error if executed in interrupt context diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ed6b2abdde15..da06a4d84ae2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -825,7 +825,7 @@ int update_persistent_clock(struct timespec now) return ppc_md.set_rtc_time(&tm); } -static void __read_persistent_clock(struct timespec *ts) +static void __read_persistent_clock(struct timespec64 *ts) { struct rtc_time tm; static int first = 1; @@ -849,11 +849,10 @@ static void __read_persistent_clock(struct timespec *ts) } ppc_md.get_rtc_time(&tm); - ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + ts->tv_sec = rtc_tm_to_time64(&tm); } -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 2188d691a40f..d76daa90647b 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -192,9 +192,7 @@ void mpc8xx_get_rtc_time(struct rtc_time *tm) /* Get time from the RTC. */ data = in_be32(&sys_tmr->sit_rtc); - to_tm(data, tm); - tm->tm_year -= 1900; - tm->tm_mon -= 1; + rtc_time64_to_tm(data, tm); immr_unmap(sys_tmr); return; } diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h index d10f4af3a42e..4f358b55c341 100644 --- a/arch/powerpc/platforms/maple/maple.h +++ b/arch/powerpc/platforms/maple/maple.h @@ -6,7 +6,7 @@ */ extern int maple_set_rtc_time(struct rtc_time *tm); extern void maple_get_rtc_time(struct rtc_time *tm); -extern unsigned long maple_get_boot_time(void); +extern time64_t maple_get_boot_time(void); extern void maple_calibrate_decr(void); extern void maple_pci_init(void); extern void maple_pci_irq_fixup(struct pci_dev *dev); diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index cfddc87f81bf..becf2ebf7df5 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -137,7 +137,7 @@ static struct resource rtc_iores = { .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; -unsigned long __init maple_get_boot_time(void) +time64_t __init maple_get_boot_time(void) { struct rtc_time tm; struct device_node *rtcs; @@ -170,7 +170,6 @@ unsigned long __init maple_get_boot_time(void) request_resource(&ioport_resource, &rtc_iores); maple_get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + return rtc_tm_to_time64(&tm); } diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 329d2a619254..70b56048ed1b 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -2,7 +2,7 @@ #ifndef _PASEMI_PASEMI_H #define _PASEMI_PASEMI_H -extern unsigned long pas_get_boot_time(void); +extern time64_t pas_get_boot_time(void); extern void pas_pci_init(void); extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c index fa54351ac268..ea815254ee7b 100644 --- a/arch/powerpc/platforms/pasemi/time.c +++ b/arch/powerpc/platforms/pasemi/time.c @@ -21,8 +21,8 @@ #include -unsigned long __init pas_get_boot_time(void) +time64_t __init pas_get_boot_time(void) { /* Let's just return a fake date right now */ - return mktime(2006, 1, 1, 12, 0, 0); + return mktime64(2006, 1, 1, 12, 0, 0); } diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h index 6f15b8804e9b..16a52afdb76e 100644 --- a/arch/powerpc/platforms/powermac/pmac.h +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -15,7 +15,7 @@ struct rtc_time; extern int pmac_newworld; extern long pmac_time_init(void); -extern unsigned long pmac_get_boot_time(void); +extern time64_t pmac_get_boot_time(void); extern void pmac_get_rtc_time(struct rtc_time *); extern int pmac_set_rtc_time(struct rtc_time *); extern void pmac_read_rtc_time(void); diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 274af6fa388e..d5d1c452038e 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -84,15 +84,6 @@ long __init pmac_time_init(void) return delta; } -#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) -static void to_rtc_time(unsigned long now, struct rtc_time *tm) -{ - to_tm(now, tm); - tm->tm_year -= 1900; - tm->tm_mon -= 1; -} -#endif - #if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) || \ defined(CONFIG_PMAC_SMU) static unsigned long from_rtc_time(struct rtc_time *tm) @@ -103,10 +94,10 @@ static unsigned long from_rtc_time(struct rtc_time *tm) #endif #ifdef CONFIG_ADB_CUDA -static unsigned long cuda_get_time(void) +static time64_t cuda_get_time(void) { struct adb_request req; - unsigned int now; + time64_t now; if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) return 0; @@ -117,10 +108,10 @@ static unsigned long cuda_get_time(void) req.reply_len); now = (req.reply[3] << 24) + (req.reply[4] << 16) + (req.reply[5] << 8) + req.reply[6]; - return ((unsigned long)now) - RTC_OFFSET; + return now - RTC_OFFSET; } -#define cuda_get_rtc_time(tm) to_rtc_time(cuda_get_time(), (tm)) +#define cuda_get_rtc_time(tm) rtc_time64_to_tm(cuda_get_time(), (tm)) static int cuda_set_rtc_time(struct rtc_time *tm) { @@ -147,10 +138,10 @@ static int cuda_set_rtc_time(struct rtc_time *tm) #endif #ifdef CONFIG_ADB_PMU -static unsigned long pmu_get_time(void) +static time64_t pmu_get_time(void) { struct adb_request req; - unsigned int now; + time64_t now; if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) return 0; @@ -160,10 +151,10 @@ static unsigned long pmu_get_time(void) req.reply_len); now = (req.reply[0] << 24) + (req.reply[1] << 16) + (req.reply[2] << 8) + req.reply[3]; - return ((unsigned long)now) - RTC_OFFSET; + return now - RTC_OFFSET; } -#define pmu_get_rtc_time(tm) to_rtc_time(pmu_get_time(), (tm)) +#define pmu_get_rtc_time(tm) rtc_time64_to_tm(pmu_get_time(), (tm)) static int pmu_set_rtc_time(struct rtc_time *tm) { @@ -188,13 +179,13 @@ static int pmu_set_rtc_time(struct rtc_time *tm) #endif #ifdef CONFIG_PMAC_SMU -static unsigned long smu_get_time(void) +static time64_t smu_get_time(void) { struct rtc_time tm; if (smu_get_rtc_time(&tm, 1)) return 0; - return from_rtc_time(&tm); + return rtc_tm_to_time64(&tm); } #else @@ -204,7 +195,7 @@ static unsigned long smu_get_time(void) #endif /* Can't be __init, it's called when suspending and resuming */ -unsigned long pmac_get_boot_time(void) +time64_t pmac_get_boot_time(void) { /* Get the time from the RTC, used only at boot time */ switch (sys_ctrler) { diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index aa2a5139462e..42ec642a3eba 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -34,7 +34,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) tm->tm_wday = -1; } -unsigned long __init opal_get_boot_time(void) +time64_t __init opal_get_boot_time(void) { struct rtc_time tm; u32 y_m_d; @@ -61,8 +61,7 @@ unsigned long __init opal_get_boot_time(void) y_m_d = be32_to_cpu(__y_m_d); h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); - return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); + return rtc_tm_to_time64(&tm); } static __init int opal_time_init(void) diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h index 1809cfc562ee..9bc68f913466 100644 --- a/arch/powerpc/platforms/ps3/platform.h +++ b/arch/powerpc/platforms/ps3/platform.h @@ -57,7 +57,7 @@ static inline void ps3_smp_cleanup_cpu(int cpu) { } /* time */ void __init ps3_calibrate_decr(void); -unsigned long __init ps3_get_boot_time(void); +time64_t __init ps3_get_boot_time(void); void ps3_get_rtc_time(struct rtc_time *time); int ps3_set_rtc_time(struct rtc_time *time); diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index 11b45b58c81b..9dac125c997e 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -76,7 +76,7 @@ static u64 read_rtc(void) return rtc_val; } -unsigned long __init ps3_get_boot_time(void) +time64_t __init ps3_get_boot_time(void) { return read_rtc() + ps3_os_area_get_rtc_diff(); } -- cgit v1.2.3 From 5235afa89a246c9d5ab35996bc38681c474c3ed7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Apr 2018 10:36:41 +0200 Subject: powerpc: use time64_t in update_persistent_clock update_persistent_clock() is deprecated because it suffers from overflow in 2038 on 32-bit architectures. This changes powerpc to use the update_persistent_clock64() replacement, and to pass down 64-bit timestamps consistently. This is now simpler, as we no longer have to worry about the offset numbers in tm_year and tm_mon that are different between the Linux conventions and RTAS. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/time.c | 6 ++---- arch/powerpc/platforms/8xx/m8xx_setup.c | 7 +++---- arch/powerpc/platforms/powermac/time.c | 17 ++++------------- 3 files changed, 9 insertions(+), 21 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index da06a4d84ae2..b025acb32bfe 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -811,16 +811,14 @@ void __init generic_calibrate_decr(void) } } -int update_persistent_clock(struct timespec now) +int update_persistent_clock64(struct timespec64 now) { struct rtc_time tm; if (!ppc_md.set_rtc_time) return -ENODEV; - to_tm(now.tv_sec + 1 + timezone_offset, &tm); - tm.tm_year -= 1900; - tm.tm_mon -= 1; + rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm); return ppc_md.set_rtc_time(&tm); } diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index d76daa90647b..027c42d8966c 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -169,15 +169,14 @@ int mpc8xx_set_rtc_time(struct rtc_time *tm) { sitk8xx_t __iomem *sys_tmr1; sit8xx_t __iomem *sys_tmr2; - int time; + time64_t time; sys_tmr1 = immr_map(im_sitk); sys_tmr2 = immr_map(im_sit); - time = mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); + time = rtc_tm_to_time64(tm); out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY); - out_be32(&sys_tmr2->sit_rtc, time); + out_be32(&sys_tmr2->sit_rtc, (u32)time); out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY); immr_unmap(sys_tmr2); diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index d5d1c452038e..7c968e46736f 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -84,15 +84,6 @@ long __init pmac_time_init(void) return delta; } -#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) || \ - defined(CONFIG_PMAC_SMU) -static unsigned long from_rtc_time(struct rtc_time *tm) -{ - return mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday, - tm->tm_hour, tm->tm_min, tm->tm_sec); -} -#endif - #ifdef CONFIG_ADB_CUDA static time64_t cuda_get_time(void) { @@ -115,10 +106,10 @@ static time64_t cuda_get_time(void) static int cuda_set_rtc_time(struct rtc_time *tm) { - unsigned int nowtime; + time64_t nowtime; struct adb_request req; - nowtime = from_rtc_time(tm) + RTC_OFFSET; + nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET; if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) @@ -158,10 +149,10 @@ static time64_t pmu_get_time(void) static int pmu_set_rtc_time(struct rtc_time *tm) { - unsigned int nowtime; + time64_t nowtime; struct adb_request req; - nowtime = from_rtc_time(tm) + RTC_OFFSET; + nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET; if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) return -ENXIO; -- cgit v1.2.3 From 34efabe418953002d1c8e0ab28634929a9ddf433 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Apr 2018 10:36:42 +0200 Subject: powerpc: remove unused to_tm() helper to_tm() is now completely unused, the only reference being in the _dump_time() helper that is also unused. This removes both, leaving the rest of the powerpc RTC code y2038 safe to as far as the hardware supports. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/time.h | 2 -- arch/powerpc/kernel/time.c | 50 --------------------------------------- arch/powerpc/platforms/ps3/time.c | 24 ------------------- 3 files changed, 76 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 69b89f941252..b80d492ceb29 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -26,8 +26,6 @@ extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; extern struct clock_event_device decrementer_clockevent; -struct rtc_time; -extern void to_tm(int tim, struct rtc_time * tm); extern void generic_calibrate_decr(void); extern void hdec_interrupt(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b025acb32bfe..2530cf60b839 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1159,56 +1159,6 @@ void __init time_init(void) #endif } - -#define FEBRUARY 2 -#define STARTOFTIME 1970 -#define SECDAY 86400L -#define SECYR (SECDAY * 365) -#define leapyear(year) ((year) % 4 == 0 && \ - ((year) % 100 != 0 || (year) % 400 == 0)) -#define days_in_year(a) (leapyear(a) ? 366 : 365) -#define days_in_month(a) (month_days[(a) - 1]) - -static int month_days[12] = { - 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 -}; - -void to_tm(int tim, struct rtc_time * tm) -{ - register int i; - register long hms, day; - - day = tim / SECDAY; - hms = tim % SECDAY; - - /* Hours, minutes, seconds are easy */ - tm->tm_hour = hms / 3600; - tm->tm_min = (hms % 3600) / 60; - tm->tm_sec = (hms % 3600) % 60; - - /* Number of years in days */ - for (i = STARTOFTIME; day >= days_in_year(i); i++) - day -= days_in_year(i); - tm->tm_year = i; - - /* Number of months in days left */ - if (leapyear(tm->tm_year)) - days_in_month(FEBRUARY) = 29; - for (i = 1; day >= days_in_month(i); i++) - day -= days_in_month(i); - days_in_month(FEBRUARY) = 28; - tm->tm_mon = i; - - /* Days are what is left over (+1) from all that. */ - tm->tm_mday = day + 1; - - /* - * No-one uses the day of the week. - */ - tm->tm_wday = -1; -} -EXPORT_SYMBOL(to_tm); - /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit * result. diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index 9dac125c997e..08ca76e23d09 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -28,30 +28,6 @@ #include "platform.h" -#define dump_tm(_a) _dump_tm(_a, __func__, __LINE__) -static void _dump_tm(const struct rtc_time *tm, const char* func, int line) -{ - pr_debug("%s:%d tm_sec %d\n", func, line, tm->tm_sec); - pr_debug("%s:%d tm_min %d\n", func, line, tm->tm_min); - pr_debug("%s:%d tm_hour %d\n", func, line, tm->tm_hour); - pr_debug("%s:%d tm_mday %d\n", func, line, tm->tm_mday); - pr_debug("%s:%d tm_mon %d\n", func, line, tm->tm_mon); - pr_debug("%s:%d tm_year %d\n", func, line, tm->tm_year); - pr_debug("%s:%d tm_wday %d\n", func, line, tm->tm_wday); -} - -#define dump_time(_a) _dump_time(_a, __func__, __LINE__) -static void __maybe_unused _dump_time(int time, const char *func, - int line) -{ - struct rtc_time tm; - - to_tm(time, &tm); - - pr_debug("%s:%d time %d\n", func, line, time); - _dump_tm(&tm, func, line); -} - void __init ps3_calibrate_decr(void) { int result; -- cgit v1.2.3 From 796b9f5b317a46d1b744f661c38a62b1280a6ab7 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:28 +1000 Subject: powerpc/eeh: Add final message for successful recovery Add a single log line at the end of successful EEH recovery, so that it's clear that event processing has finished. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b8a329f04814..07e0a42035ce 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -778,14 +778,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { - pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n", + pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); goto hard_fail; } - pr_warn("EEH: This PCI device has failed %d times in the last hour\n", - pe->freeze_count); + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -911,6 +910,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + pr_info("EEH: Recovery successful.\n"); goto final; hard_fail: -- cgit v1.2.3 From 46d4be41b987a6b2d25a2ebdd94cafb44e21d6c5 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:30 +1000 Subject: powerpc/eeh: Fix use-after-release of EEH driver Correct two cases where eeh_pcid_get() is used to reference the driver's module but the reference is dropped before the driver pointer is used. In eeh_rmv_device() also refactor a little so that only two calls to eeh_pcid_put() are needed, rather than three and the reference isn't taken at all if it wasn't needed. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 07e0a42035ce..54333f6c9d67 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -458,9 +458,11 @@ static void *eeh_add_virt_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { - eeh_pcid_put(dev); - if (driver->err_handler) + if (driver->err_handler) { + eeh_pcid_put(dev); return NULL; + } + eeh_pcid_put(dev); } #ifdef CONFIG_PCI_IOV @@ -497,17 +499,19 @@ static void *eeh_rmv_device(void *data, void *userdata) if (eeh_dev_removed(edev)) return NULL; - driver = eeh_pcid_get(dev); - if (driver) { - eeh_pcid_put(dev); - if (removed && - eeh_pe_passed(edev->pe)) - return NULL; - if (removed && - driver->err_handler && - driver->err_handler->error_detected && - driver->err_handler->slot_reset) + if (removed) { + if (eeh_pe_passed(edev->pe)) return NULL; + driver = eeh_pcid_get(dev); + if (driver) { + if (driver->err_handler && + driver->err_handler->error_detected && + driver->err_handler->slot_reset) { + eeh_pcid_put(dev); + return NULL; + } + eeh_pcid_put(dev); + } } /* Remove it from PCI subsystem */ -- cgit v1.2.3 From a0bd54641be9df3a84f693035fbc2c31f7ca644e Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:31 +1000 Subject: powerpc/eeh: Remove unused eeh_pcid_name() Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 54333f6c9d67..ca9a73fe9cc5 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,20 +39,6 @@ struct eeh_rmv_data { int removed; }; -/** - * eeh_pcid_name - Retrieve name of PCI device driver - * @pdev: PCI device - * - * This routine is used to retrieve the name of PCI device driver - * if that's valid. - */ -static inline const char *eeh_pcid_name(struct pci_dev *pdev) -{ - if (pdev && pdev->dev.driver) - return pdev->dev.driver->name; - return ""; -} - /** * eeh_pcid_get - Get the PCI device driver * @pdev: PCI device -- cgit v1.2.3 From d6c4932fbf2421bfd7f8fe75baa4ccef4a845f79 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:32 +1000 Subject: powerpc/eeh: Strengthen types of eeh traversal functions The traversal functions eeh_pe_traverse() and eeh_pe_dev_traverse() both provide their first argument as void * but every single user casts it to the expected type. Change the type of the first parameter from void * to the appropriate type, and clean up all uses. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 7 ++++--- arch/powerpc/kernel/eeh.c | 13 +++++-------- arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++-------------------- arch/powerpc/kernel/eeh_pe.c | 19 +++++++------------ 4 files changed, 26 insertions(+), 43 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index c2266ca61853..f02e0400e6f2 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -262,7 +262,8 @@ static inline bool eeh_state_active(int state) == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); } -typedef void *(*eeh_traverse_func)(void *data, void *flag); +typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag); +typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); @@ -272,9 +273,9 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void eeh_pe_update_time_stamp(struct eeh_pe *pe); void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag); + eeh_pe_traverse_func fn, void *flag); void *eeh_pe_dev_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag); + eeh_edev_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index bc640e4c5ca5..f82dade4fb9a 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -263,9 +263,8 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) return n; } -static void *eeh_dump_pe_log(void *data, void *flag) +static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = data; struct eeh_dev *edev, *tmp; size_t *plen = flag; @@ -686,9 +685,9 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) return rc; } -static void *eeh_disable_and_save_dev_state(void *data, void *userdata) +static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, + void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -714,9 +713,8 @@ static void *eeh_disable_and_save_dev_state(void *data, void *userdata) return NULL; } -static void *eeh_restore_dev_state(void *data, void *userdata) +static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dn *pdn = eeh_dev_to_pdn(edev); struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = userdata; @@ -856,11 +854,10 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -static void *eeh_set_dev_freset(void *data, void *flag) +static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) { struct pci_dev *dev; unsigned int *freset = (unsigned int *)flag; - struct eeh_dev *edev = (struct eeh_dev *)data; dev = eeh_dev_to_pci_dev(edev); if (dev) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ca9a73fe9cc5..188d15c4fe3a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -149,9 +149,8 @@ static bool eeh_dev_removed(struct eeh_dev *edev) return false; } -static void *eeh_dev_save_state(void *data, void *userdata) +static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev; if (!edev) @@ -184,9 +183,8 @@ static void *eeh_dev_save_state(void *data, void *userdata) * merge the device driver responses. Cumulative response * passed back in "userdata". */ -static void *eeh_report_error(void *data, void *userdata) +static void *eeh_report_error(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; @@ -231,9 +229,8 @@ out_no_dev: * are now enabled. Collects up and merges the device driver responses. * Cumulative response passed back in "userdata". */ -static void *eeh_report_mmio_enabled(void *data, void *userdata) +static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; @@ -273,9 +270,8 @@ out_no_dev: * some actions, usually to save data the driver needs so that the * driver can work again while the device is recovered. */ -static void *eeh_report_reset(void *data, void *userdata) +static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; @@ -310,9 +306,8 @@ out_no_dev: return NULL; } -static void *eeh_dev_restore_state(void *data, void *userdata) +static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = data; struct pci_dev *pdev; if (!edev) @@ -348,9 +343,8 @@ static void *eeh_dev_restore_state(void *data, void *userdata) * could resume so that the device driver can do some initialization * to make the recovered device work again. */ -static void *eeh_report_resume(void *data, void *userdata) +static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); bool was_in_error; struct pci_driver *driver; @@ -397,9 +391,8 @@ out_no_dev: * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ -static void *eeh_report_failure(void *data, void *userdata) +static void *eeh_report_failure(struct eeh_dev *edev, void *userdata) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; @@ -457,10 +450,9 @@ static void *eeh_add_virt_device(void *data, void *userdata) return NULL; } -static void *eeh_rmv_device(void *data, void *userdata) +static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata) { struct pci_driver *driver; - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; int *removed = rmv_data ? &rmv_data->removed : NULL; @@ -532,9 +524,8 @@ static void *eeh_rmv_device(void *data, void *userdata) return NULL; } -static void *eeh_pe_detach_dev(void *data, void *userdata) +static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata) { - struct eeh_pe *pe = (struct eeh_pe *)data; struct eeh_dev *edev, *tmp; eeh_pe_for_each_dev(pe, edev, tmp) { @@ -555,9 +546,8 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) * PE reset (for 3 times), we try to clear the frozen state * for 3 times as well. */ -static void *__eeh_clear_pe_frozen_state(void *data, void *flag) +static void *__eeh_clear_pe_frozen_state(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; bool clear_sw_state = *(bool *)flag; int i, rc = 1; diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index ee5a67d57aab..38a4bcd8ed13 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -173,7 +173,7 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, * to be traversed. */ void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) + eeh_pe_traverse_func fn, void *flag) { struct eeh_pe *pe; void *ret; @@ -196,7 +196,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, * PE and its child PEs. */ void *eeh_pe_dev_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) + eeh_edev_traverse_func fn, void *flag) { struct eeh_pe *pe; struct eeh_dev *edev, *tmp; @@ -235,9 +235,8 @@ struct eeh_pe_get_flag { int config_addr; }; -static void *__eeh_pe_get(void *data, void *flag) +static void *__eeh_pe_get(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; struct eeh_pe_get_flag *tmp = (struct eeh_pe_get_flag *) flag; /* Unexpected PHB PE */ @@ -551,9 +550,8 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe) * PE. Also, the associated PCI devices will be put into IO frozen * state as well. */ -static void *__eeh_pe_state_mark(void *data, void *flag) +static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); struct eeh_dev *edev, *tmp; struct pci_dev *pdev; @@ -595,9 +593,8 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) } EXPORT_SYMBOL_GPL(eeh_pe_state_mark); -static void *__eeh_pe_dev_mode_mark(void *data, void *flag) +static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag) { - struct eeh_dev *edev = data; int mode = *((int *)flag); edev->mode |= mode; @@ -625,9 +622,8 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode) * given PE. Besides, we also clear the check count of the PE * as well. */ -static void *__eeh_pe_state_clear(void *data, void *flag) +static void *__eeh_pe_state_clear(struct eeh_pe *pe, void *flag) { - struct eeh_pe *pe = (struct eeh_pe *)data; int state = *((int *)flag); struct eeh_dev *edev, *tmp; struct pci_dev *pdev; @@ -858,9 +854,8 @@ static void eeh_restore_device_bars(struct eeh_dev *edev) * the expansion ROM base address, the latency timer, and etc. * from the saved values in the device node. */ -static void *eeh_restore_one_device_bars(void *data, void *flag) +static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag) { - struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dn *pdn = eeh_dev_to_pdn(edev); /* Do special restore for bridges */ -- cgit v1.2.3 From 2eae39f29b10038601ddc36dae346cd79c96faa1 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:33 +1000 Subject: powerpc/eeh: Add message when PE processing at parent To aid debugging, add a message to show when EEH processing for a PE will be done at the device's parent, rather than directly at the device. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f82dade4fb9a..1139821a9aec 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -541,8 +541,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev) /* Frozen parent PE ? */ ret = eeh_ops->get_state(parent_pe, NULL); - if (ret > 0 && !eeh_state_active(ret)) + if (ret > 0 && !eeh_state_active(ret)) { pe = parent_pe; + pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", + pe->phb->global_number, pe->addr, + pe->phb->global_number, parent_pe->addr); + } /* Next parent level */ parent_pe = parent_pe->parent; -- cgit v1.2.3 From 30424e386a30d1160a0fdf47beafe8b116d0a8f7 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:34 +1000 Subject: powerpc/eeh: Clean up pci_ers_result handling As EEH event handling progresses, a cumulative result of type pci_ers_result is built up by (some of) the eeh_report_*() functions using either: if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; or: if ((*res == PCI_ERS_RESULT_NONE) || (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; (Where *res is the accumulator.) However, the intent is not immediately clear and the result in some situations is order dependent. Address this by assigning a priority to each result value, and always merging to the highest priority. This renders the intent clear, and provides a stable value for all orderings. Signed-off-by: Sam Bobroff [mpe: Minor formatting (clang-format)] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 42 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 188d15c4fe3a..ea51c909f8c9 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,6 +39,35 @@ struct eeh_rmv_data { int removed; }; +static int eeh_result_priority(enum pci_ers_result result) +{ + switch (result) { + case PCI_ERS_RESULT_NONE: + return 1; + case PCI_ERS_RESULT_NO_AER_DRIVER: + return 2; + case PCI_ERS_RESULT_RECOVERED: + return 3; + case PCI_ERS_RESULT_CAN_RECOVER: + return 4; + case PCI_ERS_RESULT_DISCONNECT: + return 5; + case PCI_ERS_RESULT_NEED_RESET: + return 6; + default: + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + return 0; + } +}; + +static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old, + enum pci_ers_result new) +{ + if (eeh_result_priority(new) > eeh_result_priority(old)) + return new; + return old; +} + /** * eeh_pcid_get - Get the PCI device driver * @pdev: PCI device @@ -206,9 +235,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata) rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; + *res = pci_ers_merge_result(*res, rc); edev->in_error = true; pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); @@ -249,9 +276,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata) rc = driver->err_handler->mmio_enabled(dev); - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; + *res = pci_ers_merge_result(*res, rc); out: eeh_pcid_put(dev); @@ -294,10 +319,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) goto out; rc = driver->err_handler->slot_reset(dev); - if ((*res == PCI_ERS_RESULT_NONE) || - (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; - if (*res == PCI_ERS_RESULT_DISCONNECT && - rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + *res = pci_ers_merge_result(*res, rc); out: eeh_pcid_put(dev); -- cgit v1.2.3 From 309ed3a7157a50edeeddfe49bd527a7347f76237 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:35 +1000 Subject: powerpc/eeh: Introduce eeh_for_each_pe() Add a for_each-style macro for iterating through PEs without the boilerplate required by a traversal function. eeh_pe_next() is now exported, as it is now used directly in place. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 4 ++++ arch/powerpc/kernel/eeh_pe.c | 7 +++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f02e0400e6f2..677102baf3cd 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -106,6 +106,9 @@ struct eeh_pe { #define eeh_pe_for_each_dev(pe, edev, tmp) \ list_for_each_entry_safe(edev, tmp, &pe->edevs, list) +#define eeh_for_each_pe(root, pe) \ + for (pe = root; pe; pe = eeh_pe_next(pe, root)) + static inline bool eeh_pe_passed(struct eeh_pe *pe) { return pe ? !!atomic_read(&pe->pass_dev_cnt) : false; @@ -267,6 +270,7 @@ typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb); +struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root); struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no, int config_addr); int eeh_add_to_parent_pe(struct eeh_dev *edev); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 38a4bcd8ed13..1b238ecc553e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -142,8 +142,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) * The function is used to retrieve the next PE in the * hierarchy PE tree. */ -static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, - struct eeh_pe *root) +struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root) { struct list_head *next = pe->child_list.next; @@ -178,7 +177,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, struct eeh_pe *pe; void *ret; - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_for_each_pe(root, pe) { ret = fn(pe, flag); if (ret) return ret; } @@ -209,7 +208,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, } /* Traverse root PE */ - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_for_each_pe(root, pe) { eeh_pe_for_each_dev(pe, edev, tmp) { ret = fn(edev, flag); if (ret) -- cgit v1.2.3 From e2b810d51b2b36d41e3e5522e12cc752e0d865ec Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:36 +1000 Subject: powerpc/eeh: Introduce eeh_edev_actionable() The same test is done in every EEH report function, so factor it out. Since eeh_dev_removed() needs to be moved higher up in the file, simplify it a little while we're at it. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ea51c909f8c9..127f2bb41e38 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -68,6 +68,17 @@ static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old, return old; } +static bool eeh_dev_removed(struct eeh_dev *edev) +{ + return !edev || (edev->mode & EEH_DEV_REMOVED); +} + +static bool eeh_edev_actionable(struct eeh_dev *edev) +{ + return (edev->pdev && !eeh_dev_removed(edev) && + !eeh_pe_passed(edev->pe)); +} + /** * eeh_pcid_get - Get the PCI device driver * @pdev: PCI device @@ -169,15 +180,6 @@ static void eeh_enable_irq(struct pci_dev *dev) } } -static bool eeh_dev_removed(struct eeh_dev *edev) -{ - /* EEH device removed ? */ - if (!edev || (edev->mode & EEH_DEV_REMOVED)) - return true; - - return false; -} - static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata) { struct pci_dev *pdev; @@ -218,7 +220,7 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) + if (!eeh_edev_actionable(edev)) return NULL; device_lock(&dev->dev); @@ -262,7 +264,7 @@ static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) + if (!eeh_edev_actionable(edev)) return NULL; device_lock(&dev->dev); @@ -301,7 +303,7 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) + if (!eeh_edev_actionable(edev)) return NULL; device_lock(&dev->dev); @@ -371,7 +373,7 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) bool was_in_error; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) + if (!eeh_edev_actionable(edev)) return NULL; device_lock(&dev->dev); @@ -418,7 +420,7 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) + if (!eeh_edev_actionable(edev)) return NULL; device_lock(&dev->dev); -- cgit v1.2.3 From 47cc8c1cc2f2d9889e84d59cbbe8cb1cc6e24ed1 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:37 +1000 Subject: powerpc/eeh: Introduce eeh_set_channel_state() To ease future refactoring, extract setting of the channel state from the report functions out into their own functions. This increases the amount of code that is identical across all of the report functions. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 127f2bb41e38..52b5acdab0f3 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -205,6 +205,17 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata) return NULL; } +static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; + + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + if (eeh_edev_actionable(edev)) + edev->pdev->error_state = s; +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -224,7 +235,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata) return NULL; device_lock(&dev->dev); - dev->error_state = pci_channel_io_frozen; driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; @@ -307,7 +317,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) return NULL; device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; @@ -377,7 +386,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) return NULL; device_lock(&dev->dev); - dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; @@ -801,6 +809,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * hotplug for this case. */ pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_set_channel_state(pe, pci_channel_io_frozen); eeh_pe_dev_traverse(pe, eeh_report_error, &result); if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE && @@ -891,6 +900,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device drivers " "the completion of reset\n"); result = PCI_ERS_RESULT_NONE; + eeh_set_channel_state(pe, pci_channel_io_normal); eeh_pe_dev_traverse(pe, eeh_report_reset, &result); } @@ -912,6 +922,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); + eeh_set_channel_state(pe, pci_channel_io_normal); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); pr_info("EEH: Recovery successful.\n"); @@ -930,6 +941,7 @@ hard_fail: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ + eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Mark the PE to be removed permanently */ @@ -1039,6 +1051,7 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); -- cgit v1.2.3 From 010acfa1a76679174a0d8732965d76ae8a8531f7 Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:38 +1000 Subject: powerpc/eeh: Introduce eeh_set_irq_state() To ease future refactoring, extract calls to eeh_enable_irq() and eeh_disable_irq() from the various report functions. This makes the report functions initial sequences more similar, as well as making the IRQ changes visible when reading eeh_handle_normal_event(). Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 52 ++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 52b5acdab0f3..7b0670b03a97 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -124,22 +124,20 @@ static inline void eeh_pcid_put(struct pci_dev *pdev) * do real work because EEH should freeze DMA transfers for those PCI * devices encountering EEH errors, which includes MSI or MSI-X. */ -static void eeh_disable_irq(struct pci_dev *dev) +static void eeh_disable_irq(struct eeh_dev *edev) { - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - /* Don't disable MSI and MSI-X interrupts. They are * effectively disabled by the DMA Stopped state * when an EEH error occurs. */ - if (dev->msi_enabled || dev->msix_enabled) + if (edev->pdev->msi_enabled || edev->pdev->msix_enabled) return; - if (!irq_has_action(dev->irq)) + if (!irq_has_action(edev->pdev->irq)) return; edev->mode |= EEH_DEV_IRQ_DISABLED; - disable_irq_nosync(dev->irq); + disable_irq_nosync(edev->pdev->irq); } /** @@ -149,10 +147,8 @@ static void eeh_disable_irq(struct pci_dev *dev) * This routine must be called to enable interrupt while failed * device could be resumed. */ -static void eeh_enable_irq(struct pci_dev *dev) +static void eeh_enable_irq(struct eeh_dev *edev) { - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; /* @@ -175,8 +171,8 @@ static void eeh_enable_irq(struct pci_dev *dev) * * tglx */ - if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) - enable_irq(dev->irq); + if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq))) + enable_irq(edev->pdev->irq); } } @@ -216,6 +212,29 @@ static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s) edev->pdev->error_state = s; } +static void eeh_set_irq_state(struct eeh_pe *root, bool enable) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; + + eeh_for_each_pe(root, pe) { + eeh_pe_for_each_dev(pe, edev, tmp) { + if (!eeh_edev_actionable(edev)) + continue; + + if (!eeh_pcid_get(edev->pdev)) + continue; + + if (enable) + eeh_enable_irq(edev); + else + eeh_disable_irq(edev); + + eeh_pcid_put(edev->pdev); + } + } +} + /** * eeh_report_error - Report pci error to each device driver * @data: eeh device @@ -239,8 +258,6 @@ static void *eeh_report_error(struct eeh_dev *edev, void *userdata) driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; - eeh_disable_irq(dev); - if (!driver->err_handler || !driver->err_handler->error_detected) goto out; @@ -321,8 +338,6 @@ static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; - eeh_enable_irq(dev); - if (!driver->err_handler || !driver->err_handler->slot_reset || (edev->mode & EEH_DEV_NO_HANDLER) || @@ -392,7 +407,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) was_in_error = edev->in_error; edev->in_error = false; - eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || @@ -437,8 +451,6 @@ static void *eeh_report_failure(struct eeh_dev *edev, void *userdata) driver = eeh_pcid_get(dev); if (!driver) goto out_no_dev; - eeh_disable_irq(dev); - if (!driver->err_handler || !driver->err_handler->error_detected) goto out; @@ -810,6 +822,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) */ pr_info("EEH: Notify device drivers to shutdown\n"); eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); eeh_pe_dev_traverse(pe, eeh_report_error, &result); if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE && @@ -901,6 +914,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) "the completion of reset\n"); result = PCI_ERS_RESULT_NONE; eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); eeh_pe_dev_traverse(pe, eeh_report_reset, &result); } @@ -923,6 +937,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); pr_info("EEH: Recovery successful.\n"); @@ -942,6 +957,7 @@ hard_fail: /* Notify all devices that they're about to go down. */ eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Mark the PE to be removed permanently */ -- cgit v1.2.3 From 665012c5734b0f2123dfb4b2bdd44c3344647b9a Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:39 +1000 Subject: powerpc/eeh: Cleaner handling of EEH_DEV_NO_HANDLER If a device without a driver is recovered via EEH, the flag EEH_DEV_NO_HANDLER is incorrectly left set on the device after recovery, because the test in eeh_report_resume() for the existence of a bound driver is done before the flag is cleared. If a driver is later bound, and EEH experienced again, some of the drivers EEH handers are not called. To correct this, clear the flag unconditionally after EEH processing is complete. Signed-off-by: Sam Bobroff Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7b0670b03a97..e18802d96654 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -411,7 +411,6 @@ static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) if (!driver->err_handler || !driver->err_handler->resume || (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { - edev->mode &= ~EEH_DEV_NO_HANDLER; goto out; } @@ -786,6 +785,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *bus; struct eeh_dev *edev, *tmp; + struct eeh_pe *tmp_pe; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; @@ -940,6 +940,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_set_irq_state(pe, true); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp) + edev->mode &= ~EEH_DEV_NO_HANDLER; + pr_info("EEH: Recovery successful.\n"); goto final; -- cgit v1.2.3 From 20b344971433da7bcd19265e5dc00a4d0df5e77e Mon Sep 17 00:00:00 2001 From: Sam Bobroff Date: Fri, 25 May 2018 13:11:40 +1000 Subject: powerpc/eeh: Refactor report functions The EEH report functions now share a fair bit of code around the start and end of each function. So factor out as much as possible, and move the traversal into a custom function. This also allows accurate debug to be generated more easily. Signed-off-by: Sam Bobroff [mpe: Format with clang-format] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh_driver.c | 307 ++++++++++++++++++++------------------- 1 file changed, 157 insertions(+), 150 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index e18802d96654..67619b4b3f96 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -60,6 +60,44 @@ static int eeh_result_priority(enum pci_ers_result result) } }; +const char *pci_ers_result_name(enum pci_ers_result result) +{ + switch (result) { + case PCI_ERS_RESULT_NONE: + return "none"; + case PCI_ERS_RESULT_CAN_RECOVER: + return "can recover"; + case PCI_ERS_RESULT_NEED_RESET: + return "need reset"; + case PCI_ERS_RESULT_DISCONNECT: + return "disconnect"; + case PCI_ERS_RESULT_RECOVERED: + return "recovered"; + case PCI_ERS_RESULT_NO_AER_DRIVER: + return "no AER driver"; + default: + WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + return "unknown"; + } +}; + +static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr, + edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf); + + va_end(args); +} + static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old, enum pci_ers_result new) { @@ -235,123 +273,117 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable) } } -/** - * eeh_report_error - Report pci error to each device driver - * @data: eeh device - * @userdata: return value - * - * Report an EEH error to each device driver, collect up and - * merge the device driver responses. Cumulative response - * passed back in "userdata". - */ -static void *eeh_report_error(struct eeh_dev *edev, void *userdata) +typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *, + struct pci_driver *); +static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, + enum pci_ers_result *result) { - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; + enum pci_ers_result new_result; + + device_lock(&edev->pdev->dev); + if (eeh_edev_actionable(edev)) { + driver = eeh_pcid_get(edev->pdev); + + if (!driver) + eeh_edev_info(edev, "no driver"); + else if (!driver->err_handler) + eeh_edev_info(edev, "driver not EEH aware"); + else if (edev->mode & EEH_DEV_NO_HANDLER) + eeh_edev_info(edev, "driver bound too late"); + else { + new_result = fn(edev, driver); + eeh_edev_info(edev, "%s driver reports: '%s'", + driver->name, + pci_ers_result_name(new_result)); + if (result) + *result = pci_ers_merge_result(*result, + new_result); + } + if (driver) + eeh_pcid_put(edev->pdev); + } else { + eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev, + !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe)); + } + device_unlock(&edev->pdev->dev); +} - if (!eeh_edev_actionable(edev)) - return NULL; +static void eeh_pe_report(const char *name, struct eeh_pe *root, + eeh_report_fn fn, enum pci_ers_result *result) +{ + struct eeh_pe *pe; + struct eeh_dev *edev, *tmp; - device_lock(&dev->dev); + pr_info("EEH: Beginning: '%s'\n", name); + eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); + if (result) + pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", + name, pci_ers_result_name(*result)); + else + pr_info("EEH: Finished:'%s'", name); +} - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; +/** + * eeh_report_error - Report pci error to each device driver + * @edev: eeh device + * @driver: device's PCI driver + * + * Report an EEH error to each device driver. + */ +static enum pci_ers_result eeh_report_error(struct eeh_dev *edev, + struct pci_driver *driver) +{ + enum pci_ers_result rc; + struct pci_dev *dev = edev->pdev; - if (!driver->err_handler || - !driver->err_handler->error_detected) - goto out; + if (!driver->err_handler->error_detected) + return PCI_ERS_RESULT_NONE; + eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)", + driver->name); rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); - *res = pci_ers_merge_result(*res, rc); - edev->in_error = true; pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + return rc; } /** * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * Tells each device driver that IO ports, MMIO and config space I/O - * are now enabled. Collects up and merges the device driver responses. - * Cumulative response passed back in "userdata". + * are now enabled. */ -static void *eeh_report_mmio_enabled(struct eeh_dev *edev, void *userdata) +static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev, + struct pci_driver *driver) { - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - if (!eeh_edev_actionable(edev)) - return NULL; - - device_lock(&dev->dev); - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; - - if (!driver->err_handler || - !driver->err_handler->mmio_enabled || - (edev->mode & EEH_DEV_NO_HANDLER)) - goto out; - - rc = driver->err_handler->mmio_enabled(dev); - - *res = pci_ers_merge_result(*res, rc); - -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + if (!driver->err_handler->mmio_enabled) + return PCI_ERS_RESULT_NONE; + eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name); + return driver->err_handler->mmio_enabled(edev->pdev); } /** * eeh_report_reset - Tell device that slot has been reset - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This routine must be called while EEH tries to reset particular * PCI device so that the associated PCI device driver could take * some actions, usually to save data the driver needs so that the * driver can work again while the device is recovered. */ -static void *eeh_report_reset(struct eeh_dev *edev, void *userdata) +static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev, + struct pci_driver *driver) { - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - if (!eeh_edev_actionable(edev)) - return NULL; - - device_lock(&dev->dev); - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; - - if (!driver->err_handler || - !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER) || - (!edev->in_error)) - goto out; - - rc = driver->err_handler->slot_reset(dev); - *res = pci_ers_merge_result(*res, rc); - -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + if (!driver->err_handler->slot_reset || !edev->in_error) + return PCI_ERS_RESULT_NONE; + eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name); + return driver->err_handler->slot_reset(edev->pdev); } static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) @@ -384,84 +416,53 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) /** * eeh_report_resume - Tell device to resume normal operations - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This routine must be called to notify the device driver that it * could resume so that the device driver can do some initialization * to make the recovered device work again. */ -static void *eeh_report_resume(struct eeh_dev *edev, void *userdata) +static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev, + struct pci_driver *driver) { - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - bool was_in_error; - struct pci_driver *driver; - - if (!eeh_edev_actionable(edev)) - return NULL; - - device_lock(&dev->dev); - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; + if (!driver->err_handler->resume || !edev->in_error) + return PCI_ERS_RESULT_NONE; - was_in_error = edev->in_error; - edev->in_error = false; + eeh_edev_info(edev, "Invoking %s->resume()", driver->name); + driver->err_handler->resume(edev->pdev); - if (!driver->err_handler || - !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { - goto out; - } - - driver->err_handler->resume(dev); - - pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); -out: - eeh_pcid_put(dev); + pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED); #ifdef CONFIG_PCI_IOV if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev)) eeh_ops->notify_resume(eeh_dev_to_pdn(edev)); #endif -out_no_dev: - device_unlock(&dev->dev); - return NULL; + return PCI_ERS_RESULT_NONE; } /** * eeh_report_failure - Tell device driver that device is dead. - * @data: eeh device - * @userdata: return value + * @edev: eeh device + * @driver: device's PCI driver * * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ -static void *eeh_report_failure(struct eeh_dev *edev, void *userdata) +static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev, + struct pci_driver *driver) { - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - struct pci_driver *driver; - - if (!eeh_edev_actionable(edev)) - return NULL; - - device_lock(&dev->dev); - dev->error_state = pci_channel_io_perm_failure; - - driver = eeh_pcid_get(dev); - if (!driver) goto out_no_dev; + enum pci_ers_result rc; - if (!driver->err_handler || - !driver->err_handler->error_detected) - goto out; + if (!driver->err_handler->error_detected) + return PCI_ERS_RESULT_NONE; - driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); + eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)", + driver->name); + rc = driver->err_handler->error_detected(edev->pdev, + pci_channel_io_perm_failure); - pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); -out: - eeh_pcid_put(dev); -out_no_dev: - device_unlock(&dev->dev); - return NULL; + pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT); + return rc; } static void *eeh_add_virt_device(void *data, void *userdata) @@ -823,7 +824,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device drivers to shutdown\n"); eeh_set_channel_state(pe, pci_channel_io_frozen); eeh_set_irq_state(pe, false); - eeh_pe_dev_traverse(pe, eeh_report_error, &result); + eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error, + &result); if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE && result != PCI_ERS_RESULT_NEED_RESET) @@ -870,7 +872,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); - eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); + eeh_pe_report("mmio_enabled", pe, + eeh_report_mmio_enabled, &result); } } @@ -915,7 +918,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) result = PCI_ERS_RESULT_NONE; eeh_set_channel_state(pe, pci_channel_io_normal); eeh_set_irq_state(pe, true); - eeh_pe_dev_traverse(pe, eeh_report_reset, &result); + eeh_pe_report("slot_reset", pe, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ @@ -938,11 +941,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Notify device driver to resume\n"); eeh_set_channel_state(pe, pci_channel_io_normal); eeh_set_irq_state(pe, true); - eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp) + eeh_pe_report("resume", pe, eeh_report_resume, NULL); + eeh_for_each_pe(pe, tmp_pe) { + eeh_pe_for_each_dev(tmp_pe, edev, tmp) { edev->mode &= ~EEH_DEV_NO_HANDLER; + edev->in_error = false; + } + } pr_info("EEH: Recovery successful.\n"); goto final; @@ -962,7 +967,8 @@ hard_fail: /* Notify all devices that they're about to go down. */ eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_set_irq_state(pe, false); - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1072,7 +1078,8 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_pe_dev_traverse(pe, + eeh_pe_report( + "error_detected(permanent failure)", pe, eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { -- cgit v1.2.3 From 6bcdd2972b9f6ebda9ae5c7075e2d59770dbbf12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 May 2018 22:57:38 +1000 Subject: powerpc/ptrace: Use copy_{from, to}_user() rather than open-coding In PPC_PTRACE_GETHWDBGINFO and PPC_PTRACE_SETHWDEBUG we do an access_ok() check and then __copy_{from,to}_user(). Instead we should just use copy_{from,to}_user() which does all that for us and is less error prone. Signed-off-by: Al Viro Signed-off-by: Michael Ellerman Reviewed-by: Samuel Mendoza-Jonas Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 0f63dd5972e9..9667666eb18e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3082,27 +3082,19 @@ long arch_ptrace(struct task_struct *child, long request, #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - if (!access_ok(VERIFY_WRITE, datavp, - sizeof(struct ppc_debug_info))) + if (copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info))) return -EFAULT; - ret = __copy_to_user(datavp, &dbginfo, - sizeof(struct ppc_debug_info)) ? - -EFAULT : 0; - break; + return 0; } case PPC_PTRACE_SETHWDEBUG: { struct ppc_hw_breakpoint bp_info; - if (!access_ok(VERIFY_READ, datavp, - sizeof(struct ppc_hw_breakpoint))) + if (copy_from_user(&bp_info, datavp, + sizeof(struct ppc_hw_breakpoint))) return -EFAULT; - ret = __copy_from_user(&bp_info, datavp, - sizeof(struct ppc_hw_breakpoint)) ? - -EFAULT : 0; - if (!ret) - ret = ppc_set_hwdebug(child, &bp_info); - break; + return ppc_set_hwdebug(child, &bp_info); } case PPC_PTRACE_DELHWDEBUG: { -- cgit v1.2.3 From 3e3786801b701cf03ee028fca786848d4865563e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 May 2018 23:03:16 +1000 Subject: powerpc: Check address limit on user-mode return (TIF_FSCHECK) set_fs() sets the addr_limit, which is used in access_ok() to determine if an address is a user or kernel address. Some code paths use set_fs() to temporarily elevate the addr_limit so that kernel code can read/write kernel memory as if it were user memory. That is fine as long as the code can't ever return to userspace with the addr_limit still elevated. If that did happen, then userspace can read/write kernel memory as if it were user memory, eg. just with write(2). In case it's not clear, that is very bad. It has also happened in the past due to bugs. Commit 5ea0727b163c ("x86/syscalls: Check address limit on user-mode return") added a mechanism to check the addr_limit value before returning to userspace. Any call to set_fs() sets a thread flag, TIF_FSCHECK, and if we see that on the return to userspace we go out of line to check that the addr_limit value is not elevated. For further info see the above commit, as well as: https://lwn.net/Articles/722267/ https://bugs.chromium.org/p/project-zero/issues/detail?id=990 Verified to work on 64-bit Book3S using a POC that objdumps the system call handler, and a modified lkdtm_CORRUPT_USER_DS() that doesn't kill the caller. Before: $ sudo ./test-tif-fscheck ... 0000000000000000 <.data>: 0: e1 f7 8a 79 rldicl. r10,r12,30,63 4: 80 03 82 40 bne 0x384 8: 00 40 8a 71 andi. r10,r12,16384 c: 78 0b 2a 7c mr r10,r1 10: 10 fd 21 38 addi r1,r1,-752 14: 08 00 c2 41 beq- 0x1c 18: 58 09 2d e8 ld r1,2392(r13) 1c: 00 00 41 f9 std r10,0(r1) 20: 70 01 61 f9 std r11,368(r1) 24: 78 01 81 f9 std r12,376(r1) 28: 70 00 01 f8 std r0,112(r1) 2c: 78 00 41 f9 std r10,120(r1) 30: 20 00 82 41 beq 0x50 34: a6 42 4c 7d mftb r10 After: $ sudo ./test-tif-fscheck Killed And in dmesg: Invalid address limit on user-mode return WARNING: CPU: 1 PID: 3689 at ../include/linux/syscalls.h:260 do_notify_resume+0x140/0x170 ... NIP [c00000000001ee50] do_notify_resume+0x140/0x170 LR [c00000000001ee4c] do_notify_resume+0x13c/0x170 Call Trace: do_notify_resume+0x13c/0x170 (unreliable) ret_from_except_lite+0x70/0x74 Performance overhead is essentially zero in the usual case, because the bit is checked as part of the existing _TIF_USER_WORK_MASK check. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/thread_info.h | 8 +++++--- arch/powerpc/include/asm/uaccess.h | 8 +++++++- arch/powerpc/kernel/signal.c | 4 ++++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 5964145db03d..f308dfeb2746 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -79,8 +79,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ +#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_PATCH_PENDING 6 /* pending live patching update */ @@ -99,6 +98,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<thread.addr_limit) -#define set_fs(val) (current->thread.addr_limit = (val)) + +static inline void set_fs(mm_segment_t fs) +{ + current->thread.addr_limit = fs; + /* On user-mode return check addr_limit (fs) is correct */ + set_thread_flag(TIF_FSCHECK); +} #define segment_eq(a, b) ((a).seg == (b).seg) diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 61db86ecd318..fb932f1202c7 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -150,6 +151,9 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { user_exit(); + /* Check valid addr_limit, TIF check is done there */ + addr_limit_user_check(); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); -- cgit v1.2.3 From 7b08729cb272b4cd5c657cd5ac0dddae15a593ff Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 23:07:26 +1000 Subject: powerpc/64: Save stack pointer when we hard disable interrupts A CPU that gets stuck with interrupts hard disable can be difficult to debug, as on some platforms we have no way to interrupt the CPU to find out what it's doing. A stop-gap is to have the CPU save it's stack pointer (r1) in its paca when it hard disables interrupts. That way if we can't interrupt it, we can at least trace the stack based on where it last disabled interrupts. In some cases that will be total junk, but the stack trace code should handle that. In the simple case of a CPU that disable interrupts and then gets stuck in a loop, the stack trace should be informative. We could clear the saved stack pointer when we enable interrupts, but that loses information which could be useful if we have nothing else to go on. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/include/asm/hw_irq.h | 6 +++++- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/kernel/exceptions-64s.S | 1 + arch/powerpc/xmon/xmon.c | 4 ++++ 4 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 9aec7237f8c2..e151774cb577 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -238,8 +238,12 @@ static inline bool arch_irqs_disabled(void) __hard_irq_disable(); \ flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (!arch_irqs_disabled_flags(flags)) \ + if (!arch_irqs_disabled_flags(flags)) { \ + asm ("stdx %%r1, 0, %1 ;" \ + : "=m" (local_paca->saved_r1) \ + : "b" (&local_paca->saved_r1)); \ trace_hardirqs_off(); \ + } \ } while(0) static inline bool lazy_irq_pending(void) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 81471bd08f66..6d34bd71139d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -161,7 +161,7 @@ struct paca_struct { struct task_struct *__current; /* Pointer to current */ u64 kstack; /* Saved Kernel stack addr */ u64 stab_rr; /* stab/slb round-robin counter */ - u64 saved_r1; /* r1 save for RTAS calls or PM */ + u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 irq_soft_mask; /* mask for irq soft masking */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae6a849db60b..bb26fe9e90ce 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1499,6 +1499,7 @@ masked_##_H##interrupt: \ xori r10,r10,MSR_EE; /* clear MSR_EE */ \ mtspr SPRN_##_H##SRR1,r10; \ 2: mtcrf 0x80,r9; \ + std r1,PACAR1(r13); \ ld r9,PACA_EXGEN+EX_R9(r13); \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0561c14b276b..47166ad2a669 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1172,6 +1172,10 @@ static int cpu_cmd(void) /* try to switch to cpu specified */ if (!cpumask_test_cpu(cpu, &cpus_in_xmon)) { printf("cpu 0x%lx isn't in xmon\n", cpu); +#ifdef CONFIG_PPC64 + printf("backtrace of paca[0x%lx].saved_r1 (possibly stale):\n", cpu); + xmon_show_stack(paca_ptrs[cpu]->saved_r1, 0, 0); +#endif return 0; } xmon_taken = 0; -- cgit v1.2.3 From 6ba55716a24f5f399ad4d37685e4bb721f8e6dd5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 23:07:27 +1000 Subject: powerpc/nmi: Add an API for sending "safe" NMIs Currently the options we have for sending NMIs are not necessarily safe, that is they can potentially interrupt a CPU in a non-recoverable region of code, meaning the kernel must then panic(). But we'd like to use smp_send_nmi_ipi() to do cross-CPU calls in situations where we don't want to risk a panic(), because it doesn't have the requirement that interrupts must be enabled like smp_call_function(). So add an API for the caller to indicate that it wants to use the NMI infrastructure, but doesn't want to do anything "unsafe". Currently that is implemented by not actually calling cause_nmi_ipi(), instead falling back to an IPI. In future we can pass the safe parameter down to cause_nmi_ipi() and the individual backends can potentially take it into account before deciding what to do. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/include/asm/smp.h | 1 + arch/powerpc/kernel/smp.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index cfecfee1194b..29ffaabdf75b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -58,6 +58,7 @@ struct smp_ops_t { extern void smp_flush_nmi_ipi(u64 delay_us); extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); +extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); extern void smp_generic_give_timebase(void); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b009a562c76b..5eadfffabe35 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -430,9 +430,9 @@ out: return ret; } -static void do_smp_send_nmi_ipi(int cpu) +static void do_smp_send_nmi_ipi(int cpu, bool safe) { - if (smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) + if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) return; if (cpu >= 0) { @@ -472,7 +472,7 @@ void smp_flush_nmi_ipi(u64 delay_us) * - delay_us > 0 is the delay before giving up waiting for targets to * enter the handler, == 0 specifies indefinite delay. */ -int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us, bool safe) { unsigned long flags; int me = raw_smp_processor_id(); @@ -505,7 +505,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_busy_count++; nmi_ipi_unlock(); - do_smp_send_nmi_ipi(cpu); + do_smp_send_nmi_ipi(cpu, safe); while (!cpumask_empty(&nmi_ipi_pending_mask)) { udelay(1); @@ -527,6 +527,16 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) return ret; } + +int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + return __smp_send_nmi_ipi(cpu, fn, delay_us, false); +} + +int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +{ + return __smp_send_nmi_ipi(cpu, fn, delay_us, true); +} #endif /* CONFIG_NMI_IPI */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST @@ -570,7 +580,7 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) * entire NMI dance and waiting for * cpus to clear pending mask, etc. */ - do_smp_send_nmi_ipi(cpu); + do_smp_send_nmi_ipi(cpu, false); } } } -- cgit v1.2.3 From 5cc05910f26e6fd6da15f052f86f6150e4b91664 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 23:07:28 +1000 Subject: powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/include/asm/nmi.h | 6 +++++ arch/powerpc/kernel/stacktrace.c | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 9c80939b4d14..0f571e0ebca1 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -8,4 +8,10 @@ extern void arch_touch_nmi_watchdog(void); static inline void arch_touch_nmi_watchdog(void) {} #endif +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE) +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace +#endif + #endif /* _ASM_NMI_H */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 8dd6ba0c7d35..b4f134e8bbd9 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,8 @@ #include #include +#include + /* * Save stack-backtrace addresses into a stack_trace buffer. */ @@ -194,3 +197,51 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk, } EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable); #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ + +#ifdef CONFIG_PPC_BOOK3S_64 +static void handle_backtrace_ipi(struct pt_regs *regs) +{ + nmi_cpu_backtrace(regs); +} + +static void raise_backtrace_ipi(cpumask_t *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + if (cpu == smp_processor_id()) + handle_backtrace_ipi(NULL); + else + smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); + } + + for_each_cpu(cpu, mask) { + struct paca_struct *p = paca_ptrs[cpu]; + + cpumask_clear_cpu(cpu, mask); + + pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu); + if (!virt_addr_valid(p)) { + pr_warn("paca pointer appears corrupt? (%px)\n", p); + continue; + } + + pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d", + p->irq_soft_mask, p->in_mce, p->in_nmi); + + if (virt_addr_valid(p->__current)) + pr_cont(" current: %d (%s)\n", p->__current->pid, + p->__current->comm); + else + pr_cont(" current pointer corrupt? (%px)\n", p->__current); + + pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1); + show_stack(p->__current, (unsigned long *)p->saved_r1); + } +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); +} +#endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From 7af76c5f23abc7afedf449e7d2960f463cbc4097 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 23:07:29 +1000 Subject: powerpc/stacktrace: Update copyright This now has new code in it written by Nick and I, and switch to a SPDX tag. Signed-off-by: Michael Ellerman Reviewed-by: Nicholas Piggin --- arch/powerpc/kernel/stacktrace.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index b4f134e8bbd9..07e97f289c52 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -1,13 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + /* - * Stack trace utility + * Stack trace utility functions etc. * * Copyright 2008 Christoph Hellwig, IBM Corp. * Copyright 2018 SUSE Linux GmbH - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ #include -- cgit v1.2.3 From 2eea7f067f495e33b8b116b35b5988ab2b8aec55 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 24 Apr 2018 14:15:55 +1000 Subject: powerpc/64s: Add support for ori barrier_nospec patching Based on the RFI patching. This is required to be able to disable the speculation barrier. Only one barrier type is supported and it does nothing when the firmware does not enable it. Also re-patching modules is not supported So the only meaningful thing that can be done is patching out the speculation barrier at boot when the user says it is not wanted. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/feature-fixups.h | 9 +++++++++ arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/security.c | 9 +++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 +++++++ arch/powerpc/lib/feature-fixups.c | 27 +++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index e582d2c88092..f67b3f6e36be 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -81,7 +81,7 @@ do { \ * Prevent execution of subsequent instructions until preceding branches have * been fully resolved and are no longer executing speculatively. */ -#define barrier_nospec_asm ori 31,31,0 +#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; nop // This also acts as a compiler barrier due to the memory clobber. #define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory") diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..86ac59e75f36 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -195,11 +195,20 @@ label##3: \ FTR_ENTRY_OFFSET 951b-952b; \ .popsection; +#define NOSPEC_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __barrier_nospec_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + #ifndef __ASSEMBLY__ #include extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; +extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; void apply_feature_fixups(void); void setup_feature_keys(void); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 27fa52ed6d00..afc7280cce3b 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,6 +52,7 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +void do_barrier_nospec_fixups(bool enable); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bab5a27ea805..b963eae0b0a0 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -9,10 +9,19 @@ #include #include +#include unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +static bool barrier_nospec_enabled; + +static void enable_barrier_nospec(bool enable) +{ + barrier_nospec_enabled = enable; + do_barrier_nospec_fixups(enable); +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c8af90ff49f0..ff73f498568c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -139,6 +139,13 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } + + . = ALIGN(8); + __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) { + __start___barrier_nospec_fixup = .; + *(__barrier_nospec_fixup) + __stop___barrier_nospec_fixup = .; + } #endif EXCEPTION_TABLE(0) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index f3e46d4edd72..ae911dad9b16 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -162,6 +162,33 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); } + +void do_barrier_nospec_fixups(bool enable) +{ + unsigned int instr, *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___barrier_nospec_fixup), + end = PTRRELOC(&__stop___barrier_nospec_fixup); + + instr = 0x60000000; /* nop */ + + if (enable) { + pr_info("barrier-nospec: using ORI speculation barrier\n"); + instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + patch_instruction(dest, instr); + } + + printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); +} + #endif /* CONFIG_PPC_BOOK3S_64 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) -- cgit v1.2.3 From 815069ca57c142eb71d27439bc27f41a433a67b3 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 24 Apr 2018 14:15:56 +1000 Subject: powerpc/64s: Patch barrier_nospec in modules Note that unlike RFI which is patched only in kernel the nospec state reflects settings at the time the module was loaded. Iterating all modules and re-patching every time the settings change is not implemented. Based on lwsync patching. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/setup.h | 7 +++++++ arch/powerpc/kernel/module.c | 6 ++++++ arch/powerpc/kernel/security.c | 2 +- arch/powerpc/lib/feature-fixups.c | 16 +++++++++++++--- 4 files changed, 27 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index afc7280cce3b..a24c3c9053cc 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -53,6 +53,13 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); +extern bool barrier_nospec_enabled; + +#ifdef CONFIG_PPC_BOOK3S_64 +void do_barrier_nospec_fixups_range(bool enable, void *start, void *end); +#else +static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }; +#endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f7ba0f5bf29..1b3c6835e730 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -72,6 +72,12 @@ int module_finalize(const Elf_Ehdr *hdr, do_feature_fixups(powerpc_firmware_features, (void *)sect->sh_addr, (void *)sect->sh_addr + sect->sh_size); + + sect = find_section(hdr, sechdrs, "__spec_barrier_fixup"); + if (sect != NULL) + do_barrier_nospec_fixups_range(barrier_nospec_enabled, + (void *)sect->sh_addr, + (void *)sect->sh_addr + sect->sh_size); #endif sect = find_section(hdr, sechdrs, "__lwsync_fixup"); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index b963eae0b0a0..39cc9eae8d7f 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; -static bool barrier_nospec_enabled; +bool barrier_nospec_enabled; static void enable_barrier_nospec(bool enable) { diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index ae911dad9b16..2b9173d09f24 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -163,14 +163,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "unknown"); } -void do_barrier_nospec_fixups(bool enable) +void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) { unsigned int instr, *dest; long *start, *end; int i; - start = PTRRELOC(&__start___barrier_nospec_fixup), - end = PTRRELOC(&__stop___barrier_nospec_fixup); + start = fixup_start; + end = fixup_end; instr = 0x60000000; /* nop */ @@ -189,6 +189,16 @@ void do_barrier_nospec_fixups(bool enable) printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } +void do_barrier_nospec_fixups(bool enable) +{ + void *start, *end; + + start = PTRRELOC(&__start___barrier_nospec_fixup), + end = PTRRELOC(&__stop___barrier_nospec_fixup); + + do_barrier_nospec_fixups_range(enable, start, end); +} + #endif /* CONFIG_PPC_BOOK3S_64 */ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) -- cgit v1.2.3 From cb3d6759a93c6d0aea1c10deb6d00e111c29c19c Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 24 Apr 2018 14:15:57 +1000 Subject: powerpc/64s: Enable barrier_nospec based on firmware settings Check what firmware told us and enable/disable the barrier_nospec as appropriate. We err on the side of enabling the barrier, as it's no-op on older systems, see the comment for more detail. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/security.c | 60 ++++++++++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 1 + arch/powerpc/platforms/pseries/setup.c | 1 + 4 files changed, 63 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index a24c3c9053cc..8721fd004291 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,6 +52,7 @@ enum l1d_flush_type { void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); +void setup_barrier_nospec(void); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 39cc9eae8d7f..06d5195f6729 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -22,6 +23,65 @@ static void enable_barrier_nospec(bool enable) do_barrier_nospec_fixups(enable); } +void setup_barrier_nospec(void) +{ + bool enable; + + /* + * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well. + * But there's a good reason not to. The two flags we check below are + * both are enabled by default in the kernel, so if the hcall is not + * functional they will be enabled. + * On a system where the host firmware has been updated (so the ori + * functions as a barrier), but on which the hypervisor (KVM/Qemu) has + * not been updated, we would like to enable the barrier. Dropping the + * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is + * we potentially enable the barrier on systems where the host firmware + * is not updated, but that's harmless as it's a no-op. + */ + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); + + enable_barrier_nospec(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int barrier_nospec_set(void *data, u64 val) +{ + switch (val) { + case 0: + case 1: + break; + default: + return -EINVAL; + } + + if (!!val == !!barrier_nospec_enabled) + return 0; + + enable_barrier_nospec(!!val); + + return 0; +} + +static int barrier_nospec_get(void *data, u64 *val) +{ + *val = barrier_nospec_enabled ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec, + barrier_nospec_get, barrier_nospec_set, "%llu\n"); + +static __init int barrier_nospec_debugfs_init(void) +{ + debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL, + &fops_barrier_nospec); + return 0; +} +device_initcall(barrier_nospec_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { bool thread_priv; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index fa63d3fff14c..8d0958cc83a8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -124,6 +124,7 @@ static void pnv_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); + setup_barrier_nospec(); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b55ad4286dc7..63b1f0d10ef0 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -534,6 +534,7 @@ void pseries_setup_rfi_flush(void) security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); + setup_barrier_nospec(); } #ifdef CONFIG_PCI_IOV -- cgit v1.2.3 From 51973a815c6b46d7b23b68d6af371ad1c9d503ca Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Apr 2018 14:15:59 +1000 Subject: powerpc/64: Use barrier_nospec in syscall entry Our syscall entry is done in assembly so patch in an explicit barrier_nospec. Based on a patch by Michal Suchanek. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3d1af55e09dc..b10e01021214 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -36,6 +36,7 @@ #include #include #include +#include #include #ifdef CONFIG_PPC_BOOK3S #include @@ -178,6 +179,15 @@ system_call: /* label this so stack traces look sane */ clrldi r8,r8,32 15: slwi r0,r0,4 + + barrier_nospec_asm + /* + * Prevent the load of the handler below (based on the user-passed + * system call number) being speculatively executed until the test + * against NR_syscalls and branch to .Lsyscall_enosys above has + * committed. + */ + ldx r12,r11,r0 /* Fetch system call handler [ptr] */ mtctr r12 bctrl /* Call handler */ -- cgit v1.2.3 From a377514519b9a20fa1ea9adddbb4129573129cef Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Mon, 28 May 2018 15:19:14 +0200 Subject: powerpc/64s: Enhance the information in cpu_show_spectre_v1() We now have barrier_nospec as mitigation so print it in cpu_show_spectre_v1() when enabled. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 06d5195f6729..3eb9c45f28d7 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -120,6 +120,9 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) return sprintf(buf, "Not affected\n"); + if (barrier_nospec_enabled) + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return sprintf(buf, "Vulnerable\n"); } -- cgit v1.2.3 From e6684d07e4308430b9b6497265781a6fb9fd87a0 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 21 May 2018 09:51:06 +0530 Subject: powerpc/sstep: Introduce GETTYPE macro Replace 'op->type & INSTR_TYPE_MASK' expression with GETTYPE(op->type) macro. Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/sstep.h | 2 ++ arch/powerpc/kernel/align.c | 2 +- arch/powerpc/lib/sstep.c | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index ab9d849644d0..4547891a684b 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -97,6 +97,8 @@ enum instruction_type { #define SIZE(n) ((n) << 12) #define GETSIZE(w) ((w) >> 12) +#define GETTYPE(t) ((t) & INSTR_TYPE_MASK) + #define MKOP(t, f, s) ((t) | (f) | SIZE(s)) struct instruction_op { diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 3e6c0744c174..11550a3d1ac2 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -339,7 +339,7 @@ int fix_alignment(struct pt_regs *regs) if (r < 0) return -EINVAL; - type = op.type & INSTR_TYPE_MASK; + type = GETTYPE(op.type); if (!OP_IS_LOAD_STORE(type)) { if (op.type != CACHEOP + DCBZ) return -EINVAL; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 49427a3ee104..f18d70449255 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -2642,7 +2642,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) unsigned long next_pc; next_pc = truncate_if_32bit(regs->msr, regs->nip + 4); - switch (op->type & INSTR_TYPE_MASK) { + switch (GETTYPE(op->type)) { case COMPUTE: if (op->type & SETREG) regs->gpr[op->reg] = op->val; @@ -2740,7 +2740,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) err = 0; size = GETSIZE(op->type); - type = op->type & INSTR_TYPE_MASK; + type = GETTYPE(op->type); cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE); ea = truncate_if_32bit(regs->msr, op->ea); @@ -3002,7 +3002,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr) } err = 0; - type = op.type & INSTR_TYPE_MASK; + type = GETTYPE(op.type); if (OP_IS_LOAD_STORE(type)) { err = emulate_loadstore(regs, &op); -- cgit v1.2.3 From 9887334b804892f10262fa7f805998d554e04367 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 28 Feb 2018 19:21:45 +0100 Subject: powerpc/dma: remove unnecessary BUG() Direction is already checked in all calling functions in include/linux/dma-mapping.h and also in called function __dma_sync() So really no need to check it once more here. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/dma.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index da20569de9d4..e1cd6e979348 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -235,8 +235,6 @@ static inline dma_addr_t dma_nommu_map_page(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - BUG_ON(dir == DMA_NONE); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) __dma_sync_page(page, offset, size, dir); -- cgit v1.2.3 From 0cc377d16e565b90b43b7550cdf5b3abd7942a75 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 Apr 2018 13:23:10 +0200 Subject: powerpc/misc: merge reloc_offset() and add_reloc_offset() reloc_offset() is the same as add_reloc_offset(0) Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc.S | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2f18fc1368d0..0b196cdcd15d 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -25,23 +25,12 @@ /* * Returns (address we are running at) - (address we were linked at) * for use before the text and data are mapped to KERNELBASE. - */ - -_GLOBAL(reloc_offset) - mflr r0 - bl 1f -1: mflr r3 - PPC_LL r4,(2f-1b)(r3) - subf r3,r4,r3 - mtlr r0 - blr - .align 3 -2: PPC_LONG 1b - -/* * add_reloc_offset(x) returns x + reloc_offset(). */ + +_GLOBAL(reloc_offset) + li r3, 0 _GLOBAL(add_reloc_offset) mflr r0 bl 1f -- cgit v1.2.3 From 56b04d568f880a48d892e840cfaf4efc0f0ce39b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 24 Apr 2018 18:04:25 +0200 Subject: powerpc/signal32: Use fault_in_pages_readable() to prefault user context Use fault_in_pages_readable() to prefault user context instead of open coding Signed-off-by: Christophe Leroy Reviewed-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/signal_32.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 342ac78f620f..5eedbb282d42 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_PPC64 @@ -1049,7 +1050,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, #endif { struct pt_regs *regs = current_pt_regs(); - unsigned char tmp __maybe_unused; int ctx_has_vsx_region = 0; #ifdef CONFIG_PPC64 @@ -1113,9 +1113,8 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, } if (new_ctx == NULL) return 0; - if (!access_ok(VERIFY_READ, new_ctx, ctx_size) - || __get_user(tmp, (u8 __user *) new_ctx) - || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1)) + if (!access_ok(VERIFY_READ, new_ctx, ctx_size) || + fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -1242,7 +1241,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, struct pt_regs *regs = current_pt_regs(); struct sig_dbg_op op; int i; - unsigned char tmp __maybe_unused; unsigned long new_msr = regs->msr; #ifdef CONFIG_PPC_ADV_DEBUG_REGS unsigned long new_dbcr0 = current->thread.debug.dbcr0; @@ -1298,9 +1296,8 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, current->thread.debug.dbcr0 = new_dbcr0; #endif - if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) - || __get_user(tmp, (u8 __user *) ctx) - || __get_user(tmp, (u8 __user *) (ctx + 1) - 1)) + if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx)) || + fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx))) return -EFAULT; /* -- cgit v1.2.3 From 60f1d2893ee6de65cdea609c84950b133e76a769 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 29 May 2018 16:19:14 +0000 Subject: powerpc/time: inline arch_vtime_task_switch() arch_vtime_task_switch() is a small function which is called only from vtime_common_task_switch(), so it is worth inlining Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputime.h | 16 +++++++++++++++- arch/powerpc/kernel/time.c | 21 --------------------- 2 files changed, 15 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 99b541865d8d..bc4903badb3f 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -47,9 +47,23 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) * has to be populated in the new task */ #ifdef CONFIG_PPC64 +#define get_accounting(tsk) (&get_paca()->accounting) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } #else -void arch_vtime_task_switch(struct task_struct *tsk); +#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +static inline void arch_vtime_task_switch(struct task_struct *prev) +{ + struct cpu_accounting_data *acct = get_accounting(current); + struct cpu_accounting_data *acct0 = get_accounting(prev); + + acct->starttime = acct0->starttime; + acct->startspurr = acct0->startspurr; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2530cf60b839..70f145e02487 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -163,12 +163,6 @@ EXPORT_SYMBOL(__cputime_usec_factor); void (*dtl_consumer)(struct dtl_entry *, u64); #endif -#ifdef CONFIG_PPC64 -#define get_accounting(tsk) (&get_paca()->accounting) -#else -#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) -#endif - static void calc_cputime_factors(void) { struct div_result res; @@ -421,21 +415,6 @@ void vtime_flush(struct task_struct *tsk) acct->softirq_time = 0; } -#ifdef CONFIG_PPC32 -/* - * Called from the context switch with interrupts disabled, to charge all - * accumulated times to the current process, and to prepare accounting on - * the next process. - */ -void arch_vtime_task_switch(struct task_struct *prev) -{ - struct cpu_accounting_data *acct = get_accounting(current); - - acct->starttime = get_accounting(prev)->starttime; - acct->startspurr = get_accounting(prev)->startspurr; -} -#endif /* CONFIG_PPC32 */ - #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif -- cgit v1.2.3 From 415520373975d2eba565c256d2cad875ed4e9243 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 5 Jun 2018 06:57:43 +0000 Subject: powerpc: fix build failure by disabling attribute-alias warning in pci_32 Commit 2479bfc9bc600 ("powerpc: Fix build by disabling attribute-alias warning for SYSCALL_DEFINEx") forgot arch/powerpc/kernel/pci_32.c Latest GCC version emit the following warnings As arch/powerpc code is built with -Werror, this breaks build with GCC 8.1 This patch inhibits this warning In file included from arch/powerpc/kernel/pci_32.c:14: ./include/linux/syscalls.h:233:18: error: 'sys_pciconfig_iobase' alias between functions of incompatible types 'long int(long int, long unsigned int, long unsigned int)' and 'long int(long int, long int, long int)' [-Werror=attribute-alias] asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ ^~~ ./include/linux/syscalls.h:222:2: note: in expansion of macro '__SYSCALL_DEFINEx' __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) ^~~~~~~~~~~~~~~~~ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_32.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index d63b488d34d7..4f861055a852 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -285,6 +285,9 @@ pci_bus_to_hose(int bus) * Note that the returned IO or memory base is a physical address */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus, unsigned long, devfn) { @@ -310,3 +313,4 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which, return result; } +#pragma GCC diagnostic pop -- cgit v1.2.3