From cdfc8ed6904d7b0c0ca23d619b387b32070703b1 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Thu, 19 Nov 2015 14:26:28 +1100 Subject: powerpc: Remove unused function trace_syscall() This function has been unused since commit 14cf11af6cf6 ("powerpc: Merge enough to start building in arch/powerpc."), so remove it. Signed-off-by: Rashmica Gupta Reviewed-by: Andrew Donnellan Reviewed-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 37de90f8a845..b6becc795bb5 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1313,13 +1313,6 @@ void nonrecoverable_exception(struct pt_regs *regs) die("nonrecoverable exception", regs, SIGKILL); } -void trace_syscall(struct pt_regs *regs) -{ - printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", - current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0], - regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); -} - void kernel_fp_unavailable_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); -- cgit v1.2.3 From f43194e45852b0455d2a3e3730f70daa76958423 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Thu, 19 Nov 2015 17:04:53 +1100 Subject: powerpc: Standardise on NR_syscalls rather than __NR_syscalls. Most architectures use NR_syscalls as the #define for the number of syscalls. We use __NR_syscalls, and then define NR_syscalls as __NR_syscalls. __NR_syscalls is not used outside arch code, whereas NR_syscalls is. So as NR_syscalls must be defined and __NR_syscalls does not, replace __NR_syscalls with NR_syscalls. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/unistd.h | 3 +-- arch/powerpc/include/asm/vdso_datapage.h | 2 +- arch/powerpc/kernel/systbl_chk.c | 2 +- arch/powerpc/kernel/systbl_chk.sh | 2 +- arch/powerpc/kernel/vdso.c | 2 +- arch/powerpc/kernel/vdso32/datapage.S | 2 +- arch/powerpc/kernel/vdso64/datapage.S | 2 +- arch/powerpc/platforms/cell/spufs/run.c | 2 +- 8 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 4b6b8ace18e0..6a5ace5fa0c8 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,10 +12,9 @@ #include -#define __NR_syscalls 379 +#define NR_syscalls 379 #define __NR__exit __NR_exit -#define NR_syscalls __NR_syscalls #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index b73a8199f161..1afe90ade595 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -41,7 +41,7 @@ #include #include -#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32) +#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) /* * So here is the ppc64 backward compatible version diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c index 2384129f5893..55323a620cfe 100644 --- a/arch/powerpc/kernel/systbl_chk.c +++ b/arch/powerpc/kernel/systbl_chk.c @@ -57,4 +57,4 @@ START_TABLE #include -END_TABLE __NR_syscalls +END_TABLE NR_syscalls diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh index 19415e7674a5..31b6e7c358ca 100644 --- a/arch/powerpc/kernel/systbl_chk.sh +++ b/arch/powerpc/kernel/systbl_chk.sh @@ -16,7 +16,7 @@ awk 'BEGIN { num = -1; } # Ignore the beginning of the file /^START_TABLE/ { num = 0; next; } /^END_TABLE/ { if (num != $2) { - printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n", + printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n", $2, num - 1; exit(1); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b457bfa28436..def1b8b5e6c1 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -671,7 +671,7 @@ static void __init vdso_setup_syscall_map(void) extern unsigned long sys_ni_syscall; - for (i = 0; i < __NR_syscalls; i++) { + for (i = 0; i < NR_syscalls; i++) { #ifdef CONFIG_PPC64 if (sys_call_table[i*2] != sys_ni_syscall) vdso_data->syscall_map_64[i >> 5] |= diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 59cf5f452879..3745113fcc65 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -61,7 +61,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) addi r3,r3,CFG_SYSCALL_MAP32 cmpli cr0,r4,0 beqlr - li r0,__NR_syscalls + li r0,NR_syscalls stw r0,0(r4) crclr cr0*4+so blr diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 2f01c4a0d8a0..184a6ba7f283 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -62,7 +62,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) cmpli cr0,r4,0 crclr cr0*4+so beqlr - li r0,__NR_syscalls + li r0,NR_syscalls stw r0,0(r4) blr .cfi_endproc diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 4ddf769a64e5..9f79004e6d6f 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -326,7 +326,7 @@ static int spu_process_callback(struct spu_context *ctx) spu_ret = -ENOSYS; npc += 4; - if (s.nr_ret < __NR_syscalls) { + if (s.nr_ret < NR_syscalls) { spu_release(ctx); /* do actual system call from here */ spu_ret = spu_sys_callback(&s); -- cgit v1.2.3 From 343c3327c12b13655192983007de94bd44fd7941 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Sat, 21 Nov 2015 17:08:16 +1100 Subject: powerpc: Add rN aliases to the pt_regs_offset table. It is common practice with powerpc to use 'rN' to refer to register 'N'. However when using the pt_regs_offset table we have to use 'gprN'. So add aliases such that both 'rN' and 'gprN' can be used. For example, we can currently do: $ su - $ cd /sys/kernel/debug/tracing $ echo "p:probe/sys_fchownat sys_fchownat %gpr3:s32 +0(%gpr4):string %gpr5:s32 %gpr6:s32 %gpr7:s32" > kprobe_events $ echo 1 > events/probe/sys_fchownat/enable $ touch /tmp/foo $ chown root /tmp/foo $ echo 0 > events/enable $ cat trace chown-2925 [014] d... 76.160657: sys_fchownat: (SyS_fchownat+0x8/0x1a0) arg1=-100 arg2="/tmp/foo" arg3=0 arg4=-1 arg5=0 Instead we'd like to be able to use: $ echo "p:probe/sys_fchownat sys_fchownat %r3:s32 +0(%r4):string %r5:s32 %r6:s32 %r7:s32" > kprobe_events Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 737c0d0b53ac..30a03c03fe73 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -60,6 +60,7 @@ struct pt_regs_offset { #define STR(s) #s /* convert to string */ #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} #define GPR_OFFSET_NAME(num) \ + {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])} #define REG_OFFSET_END {.name = NULL, .offset = 0} -- cgit v1.2.3 From 31a40e2b052c0f2b80df7b56928f9d5ff9c96933 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Nov 2015 16:44:42 +1100 Subject: powerpc/64: Include KVM guest test in all interrupt vectors Currently, if HV KVM is configured but PR KVM isn't, we don't include a test to see whether we were interrupted in KVM guest context for the set of interrupts which get delivered directly to the guest by hardware if they occur in the guest. This includes things like program interrupts. However, the recent bug where userspace could set the MSR for a VCPU to have an illegal value in the TS field, and thus cause a TM Bad Thing type of program interrupt on the hrfid that enters the guest, showed that we can never be completely sure that these interrupts can never occur in the guest entry/exit code. If one of these interrupts does happen and we have HV KVM configured but not PR KVM, then we end up trying to run the handler in the host with the MMU set to the guest MMU context, which generally ends badly. Thus, for robustness it is better to have the test in every interrupt vector, so that if some way is found to trigger some interrupt in the guest entry/exit path, we can handle it without immediately crashing the host. This means that the distinction between KVMTEST and KVMTEST_PR goes away. Thus we delete KVMTEST_PR and associated macros and use KVMTEST everywhere that we previously used either KVMTEST_PR or KVMTEST. It also means that SOFTEN_TEST_HV_201 becomes the same as SOFTEN_TEST_PR, so we deleted SOFTEN_TEST_HV_201 and use SOFTEN_TEST_PR instead. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 21 +++----------------- arch/powerpc/kernel/exceptions-64s.S | 34 ++++++++++++++++---------------- 2 files changed, 20 insertions(+), 35 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 77f52b26dad6..9ee10781121f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -263,17 +263,6 @@ do_kvm_##n: \ #define KVM_HANDLER_SKIP(area, h, n) #endif -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE -#define KVMTEST_PR(n) __KVMTEST(n) -#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n) -#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) - -#else -#define KVMTEST_PR(n) -#define KVM_HANDLER_PR(area, h, n) -#define KVM_HANDLER_PR_SKIP(area, h, n) -#endif - #define NOTEST(n) /* @@ -360,13 +349,13 @@ label##_pSeries: \ HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST_PR, vec) + EXC_STD, KVMTEST, vec) /* Version of above for when we have to branch out-of-line */ #define STD_EXCEPTION_PSERIES_OOL(vec, label) \ .globl label##_pSeries; \ label##_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD) #define STD_EXCEPTION_HV(loc, vec, label) \ @@ -436,17 +425,13 @@ label##_relon_hv: \ #define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ - KVMTEST_PR(vec); \ + KVMTEST(vec); \ _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_TEST_HV(vec) \ KVMTEST(vec); \ _SOFTEN_TEST(EXC_HV, vec) -#define SOFTEN_TEST_HV_201(vec) \ - KVMTEST(vec); \ - _SOFTEN_TEST(EXC_STD, vec) - #define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a0399c2af11..1a03142a69fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -242,7 +242,7 @@ instruction_access_slb_pSeries: HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ #ifdef __DISABLED__ @@ -276,18 +276,18 @@ hardware_interrupt_hv: KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_HV_201) + EXC_STD, SOFTEN_TEST_PR) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) . = 0x900 .globl decrementer_pSeries @@ -297,10 +297,10 @@ decrementer_pSeries: STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries @@ -332,7 +332,7 @@ system_call_pSeries: KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -408,7 +408,7 @@ hv_facility_unavailable_trampoline: #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) - KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) . = 0x1500 .global denorm_exception_hv @@ -436,7 +436,7 @@ denorm_exception_hv: #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) @@ -536,9 +536,9 @@ machine_check_pSeries_0: KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) #ifdef CONFIG_PPC_DENORMALISATION @@ -621,13 +621,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) /* moved from 0xf00 */ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60) STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) -- cgit v1.2.3 From 07e45c120c9c61b792be62182b0a8f706ee2ab24 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:53 +1100 Subject: powerpc: Don't disable kernel FP/VMX/VSX MSR bits on context switch Writing the MSR is slow, so we want to avoid it whenever possible. A subsequent patch will add a debug option that strictly manages the FP/VMX/VSX unavailable bits. For now just remove it, matching what we do in other areas of the kernel (eg enable_kernel_altivec()). A context switch microbenchmark using yield(): http://ozlabs.org/~anton/junkcode/context_switch2.c ./context_switch2 --test=yield --fp 0 0 shows an improvement of almost 3% on POWER8. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index a94f155db78e..93bb284fddf9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -453,26 +453,13 @@ _GLOBAL(_switch) SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) mflr r20 /* Return to switch caller */ - mfmsr r22 - li r0, MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r0,r0,MSR_VSX@h /* Disable VSX */ -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif /* CONFIG_VSX */ #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ mfspr r24,SPRN_VRSAVE /* save vrsave register value */ std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ - and. r0,r0,r22 - beq+ 1f - andc r22,r22,r0 - MTMSRD(r22) - isync -1: std r20,_NIP(r1) + std r20,_NIP(r1) mfcr r23 std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ -- cgit v1.2.3 From af72ab646a6bee724f190820e8f56497a5b635f0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:54 +1100 Subject: powerpc: Don't disable MSR bits in do_load_up_transact_*() functions Similar to the non TM load_up_*() functions, don't disable the MSR bits on the way out. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fpu.S | 4 ---- arch/powerpc/kernel/vector.S | 4 ---- 2 files changed, 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 9ad236e5d2c9..38eb79b8a034 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -73,10 +73,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) MTFSF_L(fr0) REST_32FPVSRS(0, R4, R7) - /* FP/VSX off again */ - MTMSRD(r6) - SYNC - blr #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f5c80d567d8d..1c5425966204 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -29,10 +29,6 @@ _GLOBAL(do_load_up_transact_altivec) addi r10,r3,THREAD_TRANSACT_VRSTATE REST_32VRS(0,r4,r10) - /* Disable VEC again. */ - MTMSRD(r6) - isync - blr #endif -- cgit v1.2.3 From 152d523e6307c7152f9986a542f873b5c5863937 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:55 +1100 Subject: powerpc: Create context switch helpers save_sprs() and restore_sprs() Move all our context switch SPR save and restore code into two helpers. We do a few optimisations: - Group all mfsprs and all mtsprs. In many cases an mtspr sets a scoreboarding bit that an mfspr waits on, so the current practise of mfspr A; mtspr A; mfpsr B; mtspr B is the worst scheduling we can do. - SPR writes are slow, so check that the value is changing before writing it. A context switch microbenchmark using yield(): http://ozlabs.org/~anton/junkcode/context_switch2.c ./context_switch2 --test=yield 0 0 shows an improvement of almost 10% on POWER8. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 1 + arch/powerpc/include/asm/switch_to.h | 11 ----- arch/powerpc/kernel/entry_64.S | 60 +---------------------- arch/powerpc/kernel/process.c | 92 +++++++++++++++++++++++++++++++----- 4 files changed, 82 insertions(+), 82 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 5afea361beaa..c273f3e0ba84 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -294,6 +294,7 @@ struct thread_struct { #endif #ifdef CONFIG_PPC64 unsigned long dscr; + unsigned long fscr; /* * This member element dscr_inherit indicates that the process * has explicitly attempted and changed the DSCR register value diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 15cca17cba4b..33a071d24ba8 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -15,17 +15,6 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); -#ifdef CONFIG_PPC_BOOK3S_64 -static inline void save_early_sprs(struct thread_struct *prev) -{ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - prev->tar = mfspr(SPRN_TAR); - if (cpu_has_feature(CPU_FTR_DSCR)) - prev->dscr = mfspr(SPRN_DSCR); -} -#else -static inline void save_early_sprs(struct thread_struct *prev) {} -#endif extern void enable_kernel_fp(void); extern void enable_kernel_altivec(void); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 93bb284fddf9..e84e5bc7fe34 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -453,29 +453,12 @@ _GLOBAL(_switch) SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) mflr r20 /* Return to switch caller */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - mfspr r24,SPRN_VRSAVE /* save vrsave register value */ - std r24,THREAD_VRSAVE(r3) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ + std r20,_NIP(r1) mfcr r23 std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - /* Event based branch registers */ - mfspr r0, SPRN_BESCR - std r0, THREAD_BESCR(r3) - mfspr r0, SPRN_EBBHR - std r0, THREAD_EBBHR(r3) - mfspr r0, SPRN_EBBRR - std r0, THREAD_EBBRR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -#endif - #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -563,47 +546,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) mr r1,r8 /* start using new stack pointer */ std r7,PACAKSAVE(r13) -#ifdef CONFIG_PPC_BOOK3S_64 -BEGIN_FTR_SECTION - /* Event based branch registers */ - ld r0, THREAD_BESCR(r4) - mtspr SPRN_BESCR, r0 - ld r0, THREAD_EBBHR(r4) - mtspr SPRN_EBBHR, r0 - ld r0, THREAD_EBBRR(r4) - mtspr SPRN_EBBRR, r0 - - ld r0,THREAD_TAR(r4) - mtspr SPRN_TAR,r0 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -#endif - -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - ld r0,THREAD_VRSAVE(r4) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - lwz r6,THREAD_DSCR_INHERIT(r4) - ld r0,THREAD_DSCR(r4) - cmpwi r6,0 - bne 1f - ld r0,PACA_DSCR_DEFAULT(r13) -1: -BEGIN_FTR_SECTION_NESTED(70) - mfspr r8, SPRN_FSCR - rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG) - mtspr SPRN_FSCR, r8 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70) - cmpd r0,r25 - beq 2f - mtspr SPRN_DSCR,r0 -2: -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif - ld r6,_CCR(r1) mtcrf 0xFF,r6 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 75b6676c1a0b..3aabed4a60a9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -742,6 +742,73 @@ void restore_tm_state(struct pt_regs *regs) #define __switch_to_tm(prev) #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +static inline void save_sprs(struct thread_struct *t) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC))) + t->vrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DSCR)) + t->dscr = mfspr(SPRN_DSCR); + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + t->bescr = mfspr(SPRN_BESCR); + t->ebbhr = mfspr(SPRN_EBBHR); + t->ebbrr = mfspr(SPRN_EBBRR); + + t->fscr = mfspr(SPRN_FSCR); + + /* + * Note that the TAR is not available for use in the kernel. + * (To provide this, the TAR should be backed up/restored on + * exception entry/exit instead, and be in pt_regs. FIXME, + * this should be in pt_regs anyway (for debug).) + */ + t->tar = mfspr(SPRN_TAR); + } +#endif +} + +static inline void restore_sprs(struct thread_struct *old_thread, + struct thread_struct *new_thread) +{ +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + old_thread->vrsave != new_thread->vrsave) + mtspr(SPRN_VRSAVE, new_thread->vrsave); +#endif +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + u64 dscr = get_paca()->dscr_default; + u64 fscr = old_thread->fscr & ~FSCR_DSCR; + + if (new_thread->dscr_inherit) { + dscr = new_thread->dscr; + fscr |= FSCR_DSCR; + } + + if (old_thread->dscr != dscr) + mtspr(SPRN_DSCR, dscr); + + if (old_thread->fscr != fscr) + mtspr(SPRN_FSCR, fscr); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (old_thread->bescr != new_thread->bescr) + mtspr(SPRN_BESCR, new_thread->bescr); + if (old_thread->ebbhr != new_thread->ebbhr) + mtspr(SPRN_EBBHR, new_thread->ebbhr); + if (old_thread->ebbrr != new_thread->ebbrr) + mtspr(SPRN_EBBRR, new_thread->ebbrr); + + if (old_thread->tar != new_thread->tar) + mtspr(SPRN_TAR, new_thread->tar); + } +#endif +} + struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -751,17 +818,16 @@ struct task_struct *__switch_to(struct task_struct *prev, struct ppc64_tlb_batch *batch; #endif + new_thread = &new->thread; + old_thread = ¤t->thread; + WARN_ON(!irqs_disabled()); - /* Back up the TAR and DSCR across context switches. - * Note that the TAR is not available for use in the kernel. (To - * provide this, the TAR should be backed up/restored on exception - * entry/exit instead, and be in pt_regs. FIXME, this should be in - * pt_regs anyway (for debug).) - * Save the TAR and DSCR here before we do treclaim/trecheckpoint as - * these will change them. + /* + * We need to save SPRs before treclaim/trecheckpoint as these will + * change a number of them. */ - save_early_sprs(&prev->thread); + save_sprs(&prev->thread); __switch_to_tm(prev); @@ -844,10 +910,6 @@ struct task_struct *__switch_to(struct task_struct *prev, #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif - - new_thread = &new->thread; - old_thread = ¤t->thread; - #ifdef CONFIG_PPC64 /* * Collect processor utilization data per process @@ -883,6 +945,10 @@ struct task_struct *__switch_to(struct task_struct *prev, last = _switch(old_thread, new_thread); + /* Need to recalculate these after calling _switch() */ + old_thread = &last->thread; + new_thread = ¤t->thread; + #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; @@ -891,6 +957,8 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ + restore_sprs(old_thread, new_thread); + return last; } -- cgit v1.2.3 From 68bfa962bff5783ad65de9dc7f3b9e16ea466766 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:56 +1100 Subject: powerpc: Remove redundant mflr in _switch No need to execute mflr twice. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e84e5bc7fe34..c8b4225a0095 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -452,9 +452,7 @@ _GLOBAL(_switch) /* r3-r13 are caller saved -- Cort */ SAVE_8GPRS(14, r1) SAVE_10GPRS(22, r1) - mflr r20 /* Return to switch caller */ - - std r20,_NIP(r1) + std r0,_NIP(r1) /* Return to switch caller */ mfcr r23 std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ -- cgit v1.2.3 From af1bbc3dd3d501d27da72e1764afe5f5b0d3882d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:57 +1100 Subject: powerpc: Remove UP only lazy floating point and vector optimisations The UP only lazy floating point and vector optimisations were written back when SMP was not common, and neither glibc nor gcc used vector instructions. Now SMP is very common, glibc aggressively uses vector instructions and gcc autovectorises. We want to add new optimisations that apply to both UP and SMP, but in preparation for that remove these UP only optimisations. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 6 -- arch/powerpc/include/asm/switch_to.h | 8 --- arch/powerpc/kernel/fpu.S | 35 ----------- arch/powerpc/kernel/head_fsl_booke.S | 32 ---------- arch/powerpc/kernel/idle_power7.S | 7 --- arch/powerpc/kernel/process.c | 113 +---------------------------------- arch/powerpc/kernel/signal_32.c | 18 ------ arch/powerpc/kernel/signal_64.c | 18 ------ arch/powerpc/kernel/vector.S | 68 --------------------- 9 files changed, 1 insertion(+), 304 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c273f3e0ba84..a2e891840806 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -88,12 +88,6 @@ struct task_struct; void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); void release_thread(struct task_struct *); -/* Lazy FPU handling on uni-processor */ -extern struct task_struct *last_task_used_math; -extern struct task_struct *last_task_used_altivec; -extern struct task_struct *last_task_used_vsx; -extern struct task_struct *last_task_used_spe; - #ifdef CONFIG_PPC32 #if CONFIG_TASK_SIZE > CONFIG_KERNEL_START diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 33a071d24ba8..bd1d93318350 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -27,14 +27,6 @@ extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); -#ifndef CONFIG_SMP -extern void discard_lazy_cpu_state(void); -#else -static inline void discard_lazy_cpu_state(void) -{ -} -#endif - #ifdef CONFIG_PPC_FPU extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 38eb79b8a034..50d2352f2cf4 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -132,31 +132,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) SYNC MTMSRD(r5) /* enable use of fpu now */ isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - */ -#ifndef CONFIG_SMP - LOAD_REG_ADDRBASE(r3, last_task_used_math) - toreal(r3) - PPC_LL r4,ADDROFF(last_task_used_math)(r3) - PPC_LCMPI 0,r4,0 - beq 1f - toreal(r4) - addi r4,r4,THREAD /* want last_task_used_math->thread */ - addi r10,r4,THREAD_FPSTATE - SAVE_32FPVSRS(0, R5, R10) - mffs fr0 - stfd fr0,FPSTATE_FPSCR(r10) - PPC_LL r5,PT_REGS(r4) - toreal(r5) - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r10,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r10 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ /* enable use of FP after return */ #ifdef CONFIG_PPC32 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ @@ -175,11 +150,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) REST_32FPVSRS(0, R4, R10) -#ifndef CONFIG_SMP - subi r4,r5,THREAD - fromreal(r4) - PPC_STL r4,ADDROFF(last_task_used_math)(r3) -#endif /* CONFIG_SMP */ /* restore registers and return */ /* we haven't used ctr or xer or lr */ blr @@ -226,11 +196,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) andc r4,r4,r3 /* disable FP for previous task */ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - LOAD_REG_ADDRBASE(r4,last_task_used_math) - PPC_STL r5,ADDROFF(last_task_used_math)(r4) -#endif /* CONFIG_SMP */ blr /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index fffd1f96bb1d..ec936abbcadc 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -857,29 +857,6 @@ _GLOBAL(load_up_spe) oris r5,r5,MSR_SPE@h mtmsr r5 /* enable use of SPE now */ isync -/* - * For SMP, we don't do lazy SPE switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_spe in switch_to. - */ -#ifndef CONFIG_SMP - lis r3,last_task_used_spe@ha - lwz r4,last_task_used_spe@l(r3) - cmpi 0,r4,0 - beq 1f - addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ - SAVE_32EVRS(0,r10,r4,THREAD_EVR0) - evxor evr10, evr10, evr10 /* clear out evr10 */ - evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ - li r5,THREAD_ACC - evstddx evr10, r4, r5 /* save off accumulator */ - lwz r5,PT_REGS(r4) - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r10,MSR_SPE@h - andc r4,r4,r10 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* !CONFIG_SMP */ /* enable use of SPE after return */ oris r9,r9,MSR_SPE@h mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ @@ -889,10 +866,6 @@ _GLOBAL(load_up_spe) evlddx evr4,r10,r5 evmra evr4,evr4 REST_32EVRS(0,r10,r5,THREAD_EVR0) -#ifndef CONFIG_SMP - subi r4,r5,THREAD - stw r4,last_task_used_spe@l(r3) -#endif /* !CONFIG_SMP */ blr /* @@ -1035,11 +1008,6 @@ _GLOBAL(giveup_spe) andc r4,r4,r3 /* disable SPE for previous task */ stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - lis r4,last_task_used_spe@ha - stw r5,last_task_used_spe@l(r4) -#endif /* !CONFIG_SMP */ blr #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 112ccf497562..cf4fb5429cf1 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -89,13 +89,6 @@ _GLOBAL(power7_powersave_common) std r0,_LINK(r1) std r0,_NIP(r1) -#ifndef CONFIG_SMP - /* Make sure FPU, VSX etc... are flushed as we may lose - * state when going to nap mode - */ - bl discard_lazy_cpu_state -#endif /* CONFIG_SMP */ - /* Hard disable interrupts */ mfmsr r9 rldicl r9,r9,48,1 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 3aabed4a60a9..e098f4315643 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -67,13 +67,6 @@ extern unsigned long _get_SP(void); -#ifndef CONFIG_SMP -struct task_struct *last_task_used_math = NULL; -struct task_struct *last_task_used_altivec = NULL; -struct task_struct *last_task_used_vsx = NULL; -struct task_struct *last_task_used_spe = NULL; -#endif - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void giveup_fpu_maybe_transactional(struct task_struct *tsk) { @@ -134,16 +127,14 @@ void flush_fp_to_thread(struct task_struct *tsk) */ preempt_disable(); if (tsk->thread.regs->msr & MSR_FP) { -#ifdef CONFIG_SMP /* * This should only ever be called for current or * for a stopped child process. Since we save away - * the FP register state on context switch on SMP, + * the FP register state on context switch, * there is something wrong if a stopped child appears * to still have its FP state in the CPU registers. */ BUG_ON(tsk != current); -#endif giveup_fpu_maybe_transactional(tsk); } preempt_enable(); @@ -156,14 +147,10 @@ void enable_kernel_fp(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) giveup_fpu_maybe_transactional(current); else giveup_fpu(NULL); /* just enables FP for kernel */ -#else - giveup_fpu_maybe_transactional(last_task_used_math); -#endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_fp); @@ -172,14 +159,10 @@ void enable_kernel_altivec(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) giveup_altivec_maybe_transactional(current); else giveup_altivec_notask(); -#else - giveup_altivec_maybe_transactional(last_task_used_altivec); -#endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_altivec); @@ -192,9 +175,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_VEC) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif giveup_altivec_maybe_transactional(tsk); } preempt_enable(); @@ -208,14 +189,10 @@ void enable_kernel_vsx(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) giveup_vsx(current); else giveup_vsx(NULL); /* just enable vsx for kernel - force */ -#else - giveup_vsx(last_task_used_vsx); -#endif /* CONFIG_SMP */ } EXPORT_SYMBOL(enable_kernel_vsx); @@ -232,9 +209,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_VSX) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif giveup_vsx(tsk); } preempt_enable(); @@ -249,14 +224,10 @@ void enable_kernel_spe(void) { WARN_ON(preemptible()); -#ifdef CONFIG_SMP if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) giveup_spe(current); else giveup_spe(NULL); /* just enable SPE for kernel - force */ -#else - giveup_spe(last_task_used_spe); -#endif /* __SMP __ */ } EXPORT_SYMBOL(enable_kernel_spe); @@ -265,9 +236,7 @@ void flush_spe_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); if (tsk->thread.regs->msr & MSR_SPE) { -#ifdef CONFIG_SMP BUG_ON(tsk != current); -#endif tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); giveup_spe(tsk); } @@ -276,32 +245,6 @@ void flush_spe_to_thread(struct task_struct *tsk) } #endif /* CONFIG_SPE */ -#ifndef CONFIG_SMP -/* - * If we are doing lazy switching of CPU state (FP, altivec or SPE), - * and the current task has some state, discard it. - */ -void discard_lazy_cpu_state(void) -{ - preempt_disable(); - if (last_task_used_math == current) - last_task_used_math = NULL; -#ifdef CONFIG_ALTIVEC - if (last_task_used_altivec == current) - last_task_used_altivec = NULL; -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX - if (last_task_used_vsx == current) - last_task_used_vsx = NULL; -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - if (last_task_used_spe == current) - last_task_used_spe = NULL; -#endif - preempt_enable(); -} -#endif /* CONFIG_SMP */ - #ifdef CONFIG_PPC_ADV_DEBUG_REGS void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int breakpt) @@ -831,30 +774,9 @@ struct task_struct *__switch_to(struct task_struct *prev, __switch_to_tm(prev); -#ifdef CONFIG_SMP - /* avoid complexity of lazy save/restore of fpu - * by just saving it every time we switch out if - * this task used the fpu during the last quantum. - * - * If it tries to use the fpu again, it'll trap and - * reload its fp regs. So we don't have to do a restore - * every switch, just a save. - * -- Cort - */ if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) giveup_fpu(prev); #ifdef CONFIG_ALTIVEC - /* - * If the previous thread used altivec in the last quantum - * (thus changing altivec regs) then save them. - * We used to check the VRSAVE register but not all apps - * set it, so we don't rely on it now (and in fact we need - * to save & restore VSCR even if VRSAVE == 0). -- paulus - * - * On SMP we always save/restore altivec regs just to avoid the - * complexity of changing processors. - * -- Cort - */ if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) giveup_altivec(prev); #endif /* CONFIG_ALTIVEC */ @@ -864,39 +786,10 @@ struct task_struct *__switch_to(struct task_struct *prev, __giveup_vsx(prev); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE - /* - * If the previous thread used spe in the last quantum - * (thus changing spe regs) then save them. - * - * On SMP we always save/restore spe regs just to avoid the - * complexity of changing processors. - */ if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) giveup_spe(prev); #endif /* CONFIG_SPE */ -#else /* CONFIG_SMP */ -#ifdef CONFIG_ALTIVEC - /* Avoid the trap. On smp this this never happens since - * we don't set last_task_used_altivec -- Cort - */ - if (new->thread.regs && last_task_used_altivec == new) - new->thread.regs->msr |= MSR_VEC; -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX - if (new->thread.regs && last_task_used_vsx == new) - new->thread.regs->msr |= MSR_VSX; -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* Avoid the trap. On smp this this never happens since - * we don't set last_task_used_spe - */ - if (new->thread.regs && last_task_used_spe == new) - new->thread.regs->msr |= MSR_SPE; -#endif /* CONFIG_SPE */ - -#endif /* CONFIG_SMP */ - #ifdef CONFIG_PPC_ADV_DEBUG_REGS switch_booke_debug_regs(&new->thread.debug); #else @@ -1111,13 +1004,10 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { - discard_lazy_cpu_state(); } void flush_thread(void) { - discard_lazy_cpu_state(); - #ifdef CONFIG_HAVE_HW_BREAKPOINT flush_ptrace_hw_breakpoint(current); #else /* CONFIG_HAVE_HW_BREAKPOINT */ @@ -1355,7 +1245,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) regs->msr = MSR_USER32; } #endif - discard_lazy_cpu_state(); #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 0dbee465af7a..3cd7a32c8ff4 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -687,15 +687,6 @@ static long restore_user_regs(struct pt_regs *regs, if (sig) regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr/evr. That way, if we get preempted - * and another task grabs the FPU/Altivec/SPE, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - #ifdef CONFIG_ALTIVEC /* * Force the process to reload the altivec registers from @@ -798,15 +789,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Restore the previous little-endian mode */ regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr/evr. That way, if we get preempted - * and another task grabs the FPU/Altivec/SPE, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - #ifdef CONFIG_ALTIVEC regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 20756dfb9f34..6f2b555516e6 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -349,15 +349,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, if (set != NULL) err |= __get_user(set->sig[0], &sc->oldmask); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr. That way, if we get preempted - * and another task grabs the FPU/Altivec, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - /* * Force reload of FP/VEC. * This has to be done before copying stuff into current->thread.fpr/vr @@ -464,15 +455,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); - /* - * Do this before updating the thread state in - * current->thread.fpr/vr. That way, if we get preempted - * and another task grabs the FPU/Altivec, it won't be - * tempted to save the current CPU state into the thread_struct - * and corrupt what we are writing there. - */ - discard_lazy_cpu_state(); - /* * Force reload of FP/VEC. * This has to be done before copying stuff into current->thread.fpr/vr diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1c5425966204..1757c0c936c1 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -80,39 +80,6 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - LOAD_REG_ADDRBASE(r3, last_task_used_altivec) - toreal(r3) - PPC_LL r4,ADDROFF(last_task_used_altivec)(r3) - PPC_LCMPI 0,r4,0 - beq 1f - - /* Save VMX state to last_task_used_altivec's THREAD struct */ - toreal(r4) - addi r4,r4,THREAD - addi r6,r4,THREAD_VRSTATE - SAVE_32VRS(0,r5,r6) - mfvscr v0 - li r10,VRSTATE_VSCR - stvx v0,r10,r6 - /* Disable VMX for last_task_used_altivec */ - PPC_LL r5,PT_REGS(r4) - toreal(r5) - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r10,MSR_VEC@h - andc r4,r4,r10 - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE * set to all zeros, we assume this is a broken application * that fails to set it properly, and thus we switch it to @@ -141,12 +108,6 @@ _GLOBAL(load_up_altivec) lvx v0,r10,r6 mtvscr v0 REST_32VRS(0,r4,r6) -#ifndef CONFIG_SMP - /* Update last_task_used_altivec to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - fromreal(r4) - PPC_STL r4,ADDROFF(last_task_used_altivec)(r3) -#endif /* CONFIG_SMP */ /* restore registers and return */ blr @@ -199,11 +160,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) andc r4,r4,r3 /* disable FP for previous task */ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - LOAD_REG_ADDRBASE(r4,last_task_used_altivec) - PPC_STL r5,ADDROFF(last_task_used_altivec)(r4) -#endif /* CONFIG_SMP */ blr #ifdef CONFIG_VSX @@ -226,20 +182,6 @@ _GLOBAL(load_up_vsx) andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ -#ifndef CONFIG_SMP - ld r3,last_task_used_vsx@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Disable VSX for last_task_used_vsx */ - addi r4,r4,THREAD - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VSX@h - andc r6,r4,r6 - std r6,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 @@ -247,11 +189,6 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) -#ifndef CONFIG_SMP - /* Update last_task_used_vsx to 'current' */ - ld r4,PACACURRENT(r13) - std r4,0(r3) -#endif /* CONFIG_SMP */ b fast_exception_return /* @@ -277,11 +214,6 @@ _GLOBAL(__giveup_vsx) andc r4,r4,r3 /* disable VSX for previous task */ std r4,_MSR-STACK_FRAME_OVERHEAD(r5) 1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_vsx@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ blr #endif /* CONFIG_VSX */ -- cgit v1.2.3 From b86fd2bd03021ce906bfa0c1456ec38329e31b30 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:58 +1100 Subject: powerpc: Simplify TM restore checks Instead of having multiple giveup_*_maybe_transactional() functions, separate out the TM check into a new function called check_if_tm_restore_required(). This will make it easier to optimise the giveup_*() functions in a subsequent patch. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 53 ++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e098f4315643..ef64219548d5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -68,7 +68,7 @@ extern unsigned long _get_SP(void); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void giveup_fpu_maybe_transactional(struct task_struct *tsk) +static void check_if_tm_restore_required(struct task_struct *tsk) { /* * If we are saving the current thread's registers, and the @@ -82,31 +82,9 @@ void giveup_fpu_maybe_transactional(struct task_struct *tsk) tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; set_thread_flag(TIF_RESTORE_TM); } - - giveup_fpu(tsk); -} - -void giveup_altivec_maybe_transactional(struct task_struct *tsk) -{ - /* - * If we are saving the current thread's registers, and the - * thread is in a transactional state, set the TIF_RESTORE_TM - * bit so that we know to restore the registers before - * returning to userspace. - */ - if (tsk == current && tsk->thread.regs && - MSR_TM_ACTIVE(tsk->thread.regs->msr) && - !test_thread_flag(TIF_RESTORE_TM)) { - tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; - set_thread_flag(TIF_RESTORE_TM); - } - - giveup_altivec(tsk); } - #else -#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk) -#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk) +static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #ifdef CONFIG_PPC_FPU @@ -135,7 +113,8 @@ void flush_fp_to_thread(struct task_struct *tsk) * to still have its FP state in the CPU registers. */ BUG_ON(tsk != current); - giveup_fpu_maybe_transactional(tsk); + check_if_tm_restore_required(tsk); + giveup_fpu(tsk); } preempt_enable(); } @@ -147,10 +126,12 @@ void enable_kernel_fp(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) - giveup_fpu_maybe_transactional(current); - else + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { + check_if_tm_restore_required(current); + giveup_fpu(current); + } else { giveup_fpu(NULL); /* just enables FP for kernel */ + } } EXPORT_SYMBOL(enable_kernel_fp); @@ -159,10 +140,12 @@ void enable_kernel_altivec(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) - giveup_altivec_maybe_transactional(current); - else + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { + check_if_tm_restore_required(current); + giveup_altivec(current); + } else { giveup_altivec_notask(); + } } EXPORT_SYMBOL(enable_kernel_altivec); @@ -176,7 +159,8 @@ void flush_altivec_to_thread(struct task_struct *tsk) preempt_disable(); if (tsk->thread.regs->msr & MSR_VEC) { BUG_ON(tsk != current); - giveup_altivec_maybe_transactional(tsk); + check_if_tm_restore_required(tsk); + giveup_altivec(tsk); } preempt_enable(); } @@ -198,8 +182,9 @@ EXPORT_SYMBOL(enable_kernel_vsx); void giveup_vsx(struct task_struct *tsk) { - giveup_fpu_maybe_transactional(tsk); - giveup_altivec_maybe_transactional(tsk); + check_if_tm_restore_required(tsk); + giveup_fpu(tsk); + giveup_altivec(tsk); __giveup_vsx(tsk); } EXPORT_SYMBOL(giveup_vsx); -- cgit v1.2.3 From 611b0e5c19963374175b39f42117b03ee7573228 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:43:59 +1100 Subject: powerpc: Create mtmsrd_isync() mtmsrd_isync() will do an mtmsrd followed by an isync on older processors. On newer processors we avoid the isync via a feature fixup. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 8 ++++++++ arch/powerpc/kernel/process.c | 30 ++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a908ada8e0a5..987dac090244 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1193,12 +1193,20 @@ #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ : : "r" (v) : "memory") #define mtmsr(v) __mtmsrd((v), 0) +#define __MTMSR "mtmsrd" #else #define mtmsr(v) asm volatile("mtmsr %0" : \ : "r" ((unsigned long)(v)) \ : "memory") +#define __MTMSR "mtmsr" #endif +static inline void mtmsr_isync(unsigned long val) +{ + asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : : + "r" (val), "i" (CPU_FTR_ARCH_206) : "memory"); +} + #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval;}) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ef64219548d5..5bf8ec2597d4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -130,7 +130,10 @@ void enable_kernel_fp(void) check_if_tm_restore_required(current); giveup_fpu(current); } else { - giveup_fpu(NULL); /* just enables FP for kernel */ + u64 oldmsr = mfmsr(); + + if (!(oldmsr & MSR_FP)) + mtmsr_isync(oldmsr | MSR_FP); } } EXPORT_SYMBOL(enable_kernel_fp); @@ -144,7 +147,10 @@ void enable_kernel_altivec(void) check_if_tm_restore_required(current); giveup_altivec(current); } else { - giveup_altivec_notask(); + u64 oldmsr = mfmsr(); + + if (!(oldmsr & MSR_VEC)) + mtmsr_isync(oldmsr | MSR_VEC); } } EXPORT_SYMBOL(enable_kernel_altivec); @@ -173,10 +179,14 @@ void enable_kernel_vsx(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) + if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { giveup_vsx(current); - else - giveup_vsx(NULL); /* just enable vsx for kernel - force */ + } else { + u64 oldmsr = mfmsr(); + + if (!(oldmsr & MSR_VSX)) + mtmsr_isync(oldmsr | MSR_VSX); + } } EXPORT_SYMBOL(enable_kernel_vsx); @@ -209,10 +219,14 @@ void enable_kernel_spe(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) { giveup_spe(current); - else - giveup_spe(NULL); /* just enable SPE for kernel - force */ + } else { + u64 oldmsr = mfmsr(); + + if (!(oldmsr & MSR_SPE)) + mtmsr_isync(oldmsr | MSR_SPE); + } } EXPORT_SYMBOL(enable_kernel_spe); -- cgit v1.2.3 From b51b1153d0e78a70767441273331d2de066bb929 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:00 +1100 Subject: powerpc: Remove NULL task struct pointer checks in FP and vector code We used to allow giveup_*() to be called with a NULL task struct pointer. Now those cases are handled in the caller we can remove the checks. We can also remove giveup_altivec_notask() which is also unused. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 1 - arch/powerpc/kernel/fpu.S | 2 -- arch/powerpc/kernel/head_fsl_booke.S | 2 -- arch/powerpc/kernel/vector.S | 14 -------------- 4 files changed, 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index bd1d93318350..042aaf05a787 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -38,7 +38,6 @@ static inline void giveup_fpu(struct task_struct *t) { } #ifdef CONFIG_ALTIVEC extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); -extern void giveup_altivec_notask(void); #else static inline void flush_altivec_to_thread(struct task_struct *t) { diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 50d2352f2cf4..71bdce284ad9 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -173,8 +173,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) MTMSRD(r5) /* enable use of fpu now */ SYNC_601 isync - PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index ec936abbcadc..d6980bbae954 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -992,8 +992,6 @@ _GLOBAL(giveup_spe) oris r5,r5,MSR_SPE@h mtmsr r5 /* enable use of SPE now */ isync - cmpi 0,r3,0 - beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ lwz r5,PT_REGS(r3) cmpi 0,r5,0 diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1757c0c936c1..b31528c30253 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -111,16 +111,6 @@ _GLOBAL(load_up_altivec) /* restore registers and return */ blr -_GLOBAL(giveup_altivec_notask) - mfmsr r3 - andis. r4,r3,MSR_VEC@h - bnelr /* Already enabled? */ - oris r3,r3,MSR_VEC@h - SYNC - MTMSRD(r3) /* enable use of VMX now */ - isync - blr - /* * giveup_altivec(tsk) * Disable VMX for the task given as the argument, @@ -133,8 +123,6 @@ _GLOBAL(giveup_altivec) SYNC MTMSRD(r5) /* enable use of VMX now */ isync - PPC_LCMPI 0,r3,0 - beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) @@ -203,8 +191,6 @@ _GLOBAL(__giveup_vsx) mtmsrd r5 /* enable use of VSX now */ isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ ld r5,PT_REGS(r3) cmpdi 0,r5,0 -- cgit v1.2.3 From 98da581e0846f6d932a4bc46a55458140e20478a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:01 +1100 Subject: powerpc: Move part of giveup_fpu,altivec,spe into c Move the MSR modification into new c functions. Removing it from the low level functions will allow us to avoid costly MSR writes by batching them up. Move the check_if_tm_restore_required() check into these new functions. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 21 ++++++++------- arch/powerpc/kernel/fpu.S | 16 ++--------- arch/powerpc/kernel/head_fsl_booke.S | 8 ++---- arch/powerpc/kernel/ppc_ksyms.c | 6 ----- arch/powerpc/kernel/process.c | 52 +++++++++++++++++++++++++++++++++--- arch/powerpc/kernel/vector.S | 10 ++----- 6 files changed, 65 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 042aaf05a787..c2678b93bcba 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -23,28 +23,27 @@ extern int emulate_altivec(struct pt_regs *); extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); extern void enable_kernel_spe(void); -extern void giveup_spe(struct task_struct *); extern void load_up_spe(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); #ifdef CONFIG_PPC_FPU extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); +extern void __giveup_fpu(struct task_struct *); #else static inline void flush_fp_to_thread(struct task_struct *t) { } static inline void giveup_fpu(struct task_struct *t) { } +static inline void __giveup_fpu(struct task_struct *t) { } #endif #ifdef CONFIG_ALTIVEC extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); +extern void __giveup_altivec(struct task_struct *); #else -static inline void flush_altivec_to_thread(struct task_struct *t) -{ -} -static inline void giveup_altivec(struct task_struct *t) -{ -} +static inline void flush_altivec_to_thread(struct task_struct *t) { } +static inline void giveup_altivec(struct task_struct *t) { } +static inline void __giveup_altivec(struct task_struct *t) { } #endif #ifdef CONFIG_VSX @@ -57,10 +56,12 @@ static inline void flush_vsx_to_thread(struct task_struct *t) #ifdef CONFIG_SPE extern void flush_spe_to_thread(struct task_struct *); +extern void giveup_spe(struct task_struct *); +extern void __giveup_spe(struct task_struct *); #else -static inline void flush_spe_to_thread(struct task_struct *t) -{ -} +static inline void flush_spe_to_thread(struct task_struct *t) { } +static inline void giveup_spe(struct task_struct *t) { } +static inline void __giveup_spe(struct task_struct *t) { } #endif static inline void clear_task_ebb(struct task_struct *t) diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 71bdce284ad9..431ab571ed1b 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -155,24 +155,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr /* - * giveup_fpu(tsk) + * __giveup_fpu(tsk) * Disable FP for the task given as the argument, * and save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r5,r5,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC_601 - ISYNC_601 - MTMSRD(r5) /* enable use of fpu now */ - SYNC_601 - isync +_GLOBAL(__giveup_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index d6980bbae954..f705171b924b 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -984,14 +984,10 @@ _GLOBAL(__setup_ehv_ivors) #ifdef CONFIG_SPE /* - * extern void giveup_spe(struct task_struct *prev) + * extern void __giveup_spe(struct task_struct *prev) * */ -_GLOBAL(giveup_spe) - mfmsr r5 - oris r5,r5,MSR_SPE@h - mtmsr r5 /* enable use of SPE now */ - isync +_GLOBAL(__giveup_spe) addi r3,r3,THREAD /* want THREAD of task */ lwz r5,PT_REGS(r3) cmpi 0,r5,0 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 202963ee013a..41e1607e800c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -19,13 +19,11 @@ EXPORT_SYMBOL(_mcount); #endif #ifdef CONFIG_PPC_FPU -EXPORT_SYMBOL(giveup_fpu); EXPORT_SYMBOL(load_fp_state); EXPORT_SYMBOL(store_fp_state); #endif #ifdef CONFIG_ALTIVEC -EXPORT_SYMBOL(giveup_altivec); EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif @@ -34,10 +32,6 @@ EXPORT_SYMBOL(store_vr_state); EXPORT_SYMBOL_GPL(__giveup_vsx); #endif -#ifdef CONFIG_SPE -EXPORT_SYMBOL(giveup_spe); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5bf8ec2597d4..6bcf82bed610 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -88,6 +88,25 @@ static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #ifdef CONFIG_PPC_FPU +void giveup_fpu(struct task_struct *tsk) +{ + u64 oldmsr = mfmsr(); + u64 newmsr; + + check_if_tm_restore_required(tsk); + + newmsr = oldmsr | MSR_FP; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + newmsr |= MSR_VSX; +#endif + if (oldmsr != newmsr) + mtmsr_isync(newmsr); + + __giveup_fpu(tsk); +} +EXPORT_SYMBOL(giveup_fpu); + /* * Make sure the floating-point register state in the * the thread_struct is up to date for task tsk. @@ -113,7 +132,6 @@ void flush_fp_to_thread(struct task_struct *tsk) * to still have its FP state in the CPU registers. */ BUG_ON(tsk != current); - check_if_tm_restore_required(tsk); giveup_fpu(tsk); } preempt_enable(); @@ -127,7 +145,6 @@ void enable_kernel_fp(void) WARN_ON(preemptible()); if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { - check_if_tm_restore_required(current); giveup_fpu(current); } else { u64 oldmsr = mfmsr(); @@ -139,12 +156,26 @@ void enable_kernel_fp(void) EXPORT_SYMBOL(enable_kernel_fp); #ifdef CONFIG_ALTIVEC +void giveup_altivec(struct task_struct *tsk) +{ + u64 oldmsr = mfmsr(); + u64 newmsr; + + check_if_tm_restore_required(tsk); + + newmsr = oldmsr | MSR_VEC; + if (oldmsr != newmsr) + mtmsr_isync(newmsr); + + __giveup_altivec(tsk); +} +EXPORT_SYMBOL(giveup_altivec); + void enable_kernel_altivec(void) { WARN_ON(preemptible()); if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { - check_if_tm_restore_required(current); giveup_altivec(current); } else { u64 oldmsr = mfmsr(); @@ -165,7 +196,6 @@ void flush_altivec_to_thread(struct task_struct *tsk) preempt_disable(); if (tsk->thread.regs->msr & MSR_VEC) { BUG_ON(tsk != current); - check_if_tm_restore_required(tsk); giveup_altivec(tsk); } preempt_enable(); @@ -214,6 +244,20 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE +void giveup_spe(struct task_struct *tsk) +{ + u64 oldmsr = mfmsr(); + u64 newmsr; + + check_if_tm_restore_required(tsk); + + newmsr = oldmsr | MSR_SPE; + if (oldmsr != newmsr) + mtmsr_isync(newmsr); + + __giveup_spe(tsk); +} +EXPORT_SYMBOL(giveup_spe); void enable_kernel_spe(void) { diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index b31528c30253..6e925b40a484 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -112,17 +112,11 @@ _GLOBAL(load_up_altivec) blr /* - * giveup_altivec(tsk) + * __giveup_altivec(tsk) * Disable VMX for the task given as the argument, * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - SYNC - MTMSRD(r5) /* enable use of VMX now */ - isync +_GLOBAL(__giveup_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) -- cgit v1.2.3 From a7d623d4d053ccb0cdfad210bced2ec25ddf69a2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:02 +1100 Subject: powerpc: Move part of giveup_vsx into c Move the MSR modification into c. Removing it from the assembly function will allow us to avoid costly MSR writes by batching them up. Check the FP and VMX bits before calling the relevant giveup_*() function. This makes giveup_vsx() and flush_vsx_to_thread() perform more like their sister functions, and allows us to use flush_vsx_to_thread() in the signal code. Move the check_if_tm_restore_required() check in. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 28 +++++++++++++++++++--------- arch/powerpc/kernel/signal_32.c | 4 ++-- arch/powerpc/kernel/signal_64.c | 4 ++-- arch/powerpc/kernel/vector.S | 6 ------ 4 files changed, 23 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6bcf82bed610..0cb627662ded 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -205,6 +205,25 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX +void giveup_vsx(struct task_struct *tsk) +{ + u64 oldmsr = mfmsr(); + u64 newmsr; + + check_if_tm_restore_required(tsk); + + newmsr = oldmsr | (MSR_FP|MSR_VEC|MSR_VSX); + if (oldmsr != newmsr) + mtmsr_isync(newmsr); + + if (tsk->thread.regs->msr & MSR_FP) + __giveup_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + __giveup_altivec(tsk); + __giveup_vsx(tsk); +} +EXPORT_SYMBOL(giveup_vsx); + void enable_kernel_vsx(void) { WARN_ON(preemptible()); @@ -220,15 +239,6 @@ void enable_kernel_vsx(void) } EXPORT_SYMBOL(enable_kernel_vsx); -void giveup_vsx(struct task_struct *tsk) -{ - check_if_tm_restore_required(tsk); - giveup_fpu(tsk); - giveup_altivec(tsk); - __giveup_vsx(tsk); -} -EXPORT_SYMBOL(giveup_vsx); - void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3cd7a32c8ff4..4022cbb7e2d6 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -458,7 +458,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - __giveup_vsx(current); + flush_vsx_to_thread(current); if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; msr |= MSR_VSX; @@ -606,7 +606,7 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - __giveup_vsx(current); + flush_vsx_to_thread(current); if (copy_vsx_to_user(&frame->mc_vsregs, current)) return 1; if (msr & MSR_VSX) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 6f2b555516e6..3b2339912911 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -147,7 +147,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * VMX data. */ if (current->thread.used_vsr && ctx_has_vsx_region) { - __giveup_vsx(current); + flush_vsx_to_thread(current); v_regs += ELF_NVRREG; err |= copy_vsx_to_user(v_regs, current); /* set MSR_VSX in the MSR value in the frame to @@ -270,7 +270,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * VMX data. */ if (current->thread.used_vsr) { - __giveup_vsx(current); + flush_vsx_to_thread(current); v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 6e925b40a484..98675b08efe2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -177,14 +177,8 @@ _GLOBAL(load_up_vsx) * __giveup_vsx(tsk) * Disable VSX for the task given as the argument. * Does NOT save vsx registers. - * Enables the VSX for use in the kernel on return. */ _GLOBAL(__giveup_vsx) - mfmsr r5 - oris r5,r5,MSR_VSX@h - mtmsrd r5 /* enable use of VSX now */ - isync - addi r3,r3,THREAD /* want THREAD of task */ ld r5,PT_REGS(r3) cmpdi 0,r5,0 -- cgit v1.2.3 From a0e72cf12b1a1f159b6822ed2e1e41893d996fc7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:04 +1100 Subject: powerpc: Create msr_check_and_{set,clear}() Create helper functions to set and clear MSR bits after first checking if they are already set. Grouping them will make it easy to avoid the MSR writes in a subsequent optimisation. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 107 ++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 55 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0cb627662ded..5cdd35c0b026 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -87,23 +87,46 @@ static void check_if_tm_restore_required(struct task_struct *tsk) static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#ifdef CONFIG_PPC_FPU -void giveup_fpu(struct task_struct *tsk) +static void msr_check_and_set(unsigned long bits) { - u64 oldmsr = mfmsr(); - u64 newmsr; + unsigned long oldmsr = mfmsr(); + unsigned long newmsr; - check_if_tm_restore_required(tsk); + newmsr = oldmsr | bits; - newmsr = oldmsr | MSR_FP; #ifdef CONFIG_VSX - if (cpu_has_feature(CPU_FTR_VSX)) + if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) newmsr |= MSR_VSX; #endif + if (oldmsr != newmsr) mtmsr_isync(newmsr); +} + +static void msr_check_and_clear(unsigned long bits) +{ + unsigned long oldmsr = mfmsr(); + unsigned long newmsr; + + newmsr = oldmsr & ~bits; + +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP)) + newmsr &= ~MSR_VSX; +#endif + if (oldmsr != newmsr) + mtmsr_isync(newmsr); +} + +#ifdef CONFIG_PPC_FPU +void giveup_fpu(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP); __giveup_fpu(tsk); + msr_check_and_clear(MSR_FP); } EXPORT_SYMBOL(giveup_fpu); @@ -144,30 +167,21 @@ void enable_kernel_fp(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { - giveup_fpu(current); - } else { - u64 oldmsr = mfmsr(); + msr_check_and_set(MSR_FP); - if (!(oldmsr & MSR_FP)) - mtmsr_isync(oldmsr | MSR_FP); - } + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + __giveup_fpu(current); } EXPORT_SYMBOL(enable_kernel_fp); #ifdef CONFIG_ALTIVEC void giveup_altivec(struct task_struct *tsk) { - u64 oldmsr = mfmsr(); - u64 newmsr; - check_if_tm_restore_required(tsk); - newmsr = oldmsr | MSR_VEC; - if (oldmsr != newmsr) - mtmsr_isync(newmsr); - + msr_check_and_set(MSR_VEC); __giveup_altivec(tsk); + msr_check_and_clear(MSR_VEC); } EXPORT_SYMBOL(giveup_altivec); @@ -175,14 +189,10 @@ void enable_kernel_altivec(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { - giveup_altivec(current); - } else { - u64 oldmsr = mfmsr(); + msr_check_and_set(MSR_VEC); - if (!(oldmsr & MSR_VEC)) - mtmsr_isync(oldmsr | MSR_VEC); - } + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + __giveup_altivec(current); } EXPORT_SYMBOL(enable_kernel_altivec); @@ -207,20 +217,15 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #ifdef CONFIG_VSX void giveup_vsx(struct task_struct *tsk) { - u64 oldmsr = mfmsr(); - u64 newmsr; - check_if_tm_restore_required(tsk); - newmsr = oldmsr | (MSR_FP|MSR_VEC|MSR_VSX); - if (oldmsr != newmsr) - mtmsr_isync(newmsr); - + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (tsk->thread.regs->msr & MSR_FP) __giveup_fpu(tsk); if (tsk->thread.regs->msr & MSR_VEC) __giveup_altivec(tsk); __giveup_vsx(tsk); + msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } EXPORT_SYMBOL(giveup_vsx); @@ -228,13 +233,14 @@ void enable_kernel_vsx(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { - giveup_vsx(current); - } else { - u64 oldmsr = mfmsr(); + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); - if (!(oldmsr & MSR_VSX)) - mtmsr_isync(oldmsr | MSR_VSX); + if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + if (current->thread.regs->msr & MSR_FP) + __giveup_fpu(current); + if (current->thread.regs->msr & MSR_VEC) + __giveup_altivec(current); + __giveup_vsx(current); } } EXPORT_SYMBOL(enable_kernel_vsx); @@ -256,16 +262,11 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread); #ifdef CONFIG_SPE void giveup_spe(struct task_struct *tsk) { - u64 oldmsr = mfmsr(); - u64 newmsr; - check_if_tm_restore_required(tsk); - newmsr = oldmsr | MSR_SPE; - if (oldmsr != newmsr) - mtmsr_isync(newmsr); - + msr_check_and_set(MSR_SPE); __giveup_spe(tsk); + msr_check_and_clear(MSR_SPE); } EXPORT_SYMBOL(giveup_spe); @@ -273,14 +274,10 @@ void enable_kernel_spe(void) { WARN_ON(preemptible()); - if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) { - giveup_spe(current); - } else { - u64 oldmsr = mfmsr(); + msr_check_and_set(MSR_SPE); - if (!(oldmsr & MSR_SPE)) - mtmsr_isync(oldmsr | MSR_SPE); - } + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + __giveup_spe(current); } EXPORT_SYMBOL(enable_kernel_spe); -- cgit v1.2.3 From dc4fbba11e4661a6a77a1f89ba32f9082e6395ff Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:05 +1100 Subject: powerpc: Create disable_kernel_{fp,altivec,vsx,spe}() The enable_kernel_*() functions leave the relevant MSR bits enabled until we exit the kernel sometime later. Create disable versions that wrap the kernel use of FP, Altivec VSX or SPE. While we don't want to disable it normally for performance reasons (MSR writes are slow), it will be used for a debug boot option that does this and catches bad uses in other areas of the kernel. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/crypto/aes-spe-glue.c | 1 + arch/powerpc/crypto/sha1-spe-glue.c | 1 + arch/powerpc/crypto/sha256-spe-glue.c | 1 + arch/powerpc/include/asm/switch_to.h | 5 +++++ arch/powerpc/kernel/align.c | 2 ++ arch/powerpc/kvm/book3s_paired_singles.c | 1 + arch/powerpc/kvm/book3s_pr.c | 4 ++++ arch/powerpc/kvm/booke.c | 4 ++++ arch/powerpc/lib/vmx-helper.c | 2 ++ arch/powerpc/lib/xor_vmx.c | 4 ++++ drivers/crypto/vmx/aes.c | 3 +++ drivers/crypto/vmx/aes_cbc.c | 3 +++ drivers/crypto/vmx/aes_ctr.c | 3 +++ drivers/crypto/vmx/ghash.c | 4 ++++ lib/raid6/altivec.uc | 1 + 15 files changed, 39 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c index bd5e63f72ad4..93ee046d12cd 100644 --- a/arch/powerpc/crypto/aes-spe-glue.c +++ b/arch/powerpc/crypto/aes-spe-glue.c @@ -85,6 +85,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index 3e1d22212521..f9ebc38d3fe7 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -46,6 +46,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index f4a616fe1a82..718a079dcdbf 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -47,6 +47,7 @@ static void spe_begin(void) static void spe_end(void) { + disable_kernel_spe(); /* reenable preemption */ preempt_enable(); } diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index c2678b93bcba..438502f59550 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -26,6 +26,11 @@ extern void enable_kernel_spe(void); extern void load_up_spe(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); +static inline void disable_kernel_fp(void) { } +static inline void disable_kernel_altivec(void) { } +static inline void disable_kernel_spe(void) { } +static inline void disable_kernel_vsx(void) { } + #ifdef CONFIG_PPC_FPU extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 86150fbb42c3..8e7cb8e2b21a 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -960,6 +960,7 @@ int fix_alignment(struct pt_regs *regs) preempt_disable(); enable_kernel_fp(); cvt_df(&data.dd, (float *)&data.x32.low32); + disable_kernel_fp(); preempt_enable(); #else return 0; @@ -1000,6 +1001,7 @@ int fix_alignment(struct pt_regs *regs) preempt_disable(); enable_kernel_fp(); cvt_fd((float *)&data.x32.low32, &data.dd); + disable_kernel_fp(); preempt_enable(); #else return 0; diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c index a759d9adb0b6..eab96cfe82fa 100644 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -1265,6 +1265,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) if (rcomp) kvmppc_set_cr(vcpu, cr); + disable_kernel_fp(); preempt_enable(); return emulated; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 64891b081ad5..49f5dad1bd45 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -751,6 +751,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, preempt_disable(); enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); t->fp_save_area = &vcpu->arch.fp; preempt_enable(); } @@ -760,6 +761,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, preempt_disable(); enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); t->vr_save_area = &vcpu->arch.vr; preempt_enable(); #endif @@ -788,6 +790,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); preempt_enable(); } #ifdef CONFIG_ALTIVEC @@ -795,6 +798,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); preempt_enable(); } #endif diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index fd5875179e5c..778ef86e187e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -98,6 +98,7 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_spe(); kvmppc_save_guest_spe(vcpu); + disable_kernel_spe(); vcpu->arch.shadow_msr &= ~MSR_SPE; preempt_enable(); } @@ -107,6 +108,7 @@ static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu) preempt_disable(); enable_kernel_spe(); kvmppc_load_guest_spe(vcpu); + disable_kernel_spe(); vcpu->arch.shadow_msr |= MSR_SPE; preempt_enable(); } @@ -141,6 +143,7 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) if (!(current->thread.regs->msr & MSR_FP)) { enable_kernel_fp(); load_fp_state(&vcpu->arch.fp); + disable_kernel_fp(); current->thread.fp_save_area = &vcpu->arch.fp; current->thread.regs->msr |= MSR_FP; } @@ -182,6 +185,7 @@ static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu) if (!(current->thread.regs->msr & MSR_VEC)) { enable_kernel_altivec(); load_vr_state(&vcpu->arch.vr); + disable_kernel_altivec(); current->thread.vr_save_area = &vcpu->arch.vr; current->thread.regs->msr |= MSR_VEC; } diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index ac93a3bd2730..b27e030fc9f8 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -46,6 +46,7 @@ int enter_vmx_usercopy(void) */ int exit_vmx_usercopy(void) { + disable_kernel_altivec(); pagefault_enable(); preempt_enable(); return 0; @@ -70,6 +71,7 @@ int enter_vmx_copy(void) */ void *exit_vmx_copy(void *dest) { + disable_kernel_altivec(); preempt_enable(); return dest; } diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index e905f7c2ea7b..07f49f1568e5 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -74,6 +74,7 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, v2 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); @@ -102,6 +103,7 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, v3 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); @@ -135,6 +137,7 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, v4 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); @@ -172,6 +175,7 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, v5 += 4; } while (--lines > 0); + disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_5); diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index 20539fb7e975..022c7ab7351a 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -86,6 +86,7 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); @@ -104,6 +105,7 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) pagefault_disable(); enable_kernel_vsx(); aes_p8_encrypt(src, dst, &ctx->enc_key); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); } @@ -120,6 +122,7 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) pagefault_disable(); enable_kernel_vsx(); aes_p8_decrypt(src, dst, &ctx->dec_key); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); } diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 8847b92e9ff0..1881b3f413fa 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -87,6 +87,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); @@ -127,6 +128,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, ret = blkcipher_walk_done(desc, &walk, nbytes); } + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); } @@ -167,6 +169,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, ret = blkcipher_walk_done(desc, &walk, nbytes); } + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); } diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 80958660c31a..2d58b18acc10 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -83,6 +83,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, pagefault_disable(); enable_kernel_vsx(); ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + disable_kernel_vsx(); pagefault_enable(); ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); @@ -101,6 +102,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, pagefault_disable(); enable_kernel_vsx(); aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); + disable_kernel_vsx(); pagefault_enable(); crypto_xor(keystream, src, nbytes); @@ -139,6 +141,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); + disable_kernel_vsx(); pagefault_enable(); /* We need to update IV mostly for last bytes/round */ diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index 1f4586c2fd25..6c999cb01b80 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -120,6 +120,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, pagefault_disable(); enable_kernel_vsx(); gcm_init_p8(ctx->htable, (const u64 *) key); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); return crypto_shash_setkey(ctx->fallback, key, keylen); @@ -150,6 +151,7 @@ static int p8_ghash_update(struct shash_desc *desc, enable_kernel_vsx(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); src += GHASH_DIGEST_SIZE - dctx->bytes; @@ -162,6 +164,7 @@ static int p8_ghash_update(struct shash_desc *desc, pagefault_disable(); enable_kernel_vsx(); gcm_ghash_p8(dctx->shash, ctx->htable, src, len); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); src += len; @@ -192,6 +195,7 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out) enable_kernel_vsx(); gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, GHASH_DIGEST_SIZE); + disable_kernel_vsx(); pagefault_enable(); preempt_enable(); dctx->bytes = 0; diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index bec27fce7501..682aae8a1fef 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -101,6 +101,7 @@ static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + disable_kernel_altivec(); preempt_enable(); } -- cgit v1.2.3 From 3eb5d5888dc68c9b187998ca4249b8b9fa481eeb Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:06 +1100 Subject: powerpc: Add ppc_strict_facility_enable boot option Add a boot option that strictly manages the MSR unavailable bits. This catches kernel uses of FP/Altivec/SPE that would otherwise corrupt user state. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- Documentation/kernel-parameters.txt | 6 ++++++ arch/powerpc/include/asm/reg.h | 9 +++++++++ arch/powerpc/include/asm/switch_to.h | 24 +++++++++++++++++++----- arch/powerpc/kernel/process.c | 17 +++++++++++++++-- 4 files changed, 49 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 742f69d18fc8..8978c26cacdd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2978,6 +2978,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. may be specified. Format: ,.... + ppc_strict_facility_enable + [PPC] This option catches any kernel floating point, + Altivec, VSX and SPE outside of regions specifically + allowed (eg kernel_enable_fpu()/kernel_disable_fpu()). + There is some performance impact when enabling this. + print-fatal-signals= [KNL] debug: print fatal signals diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 987dac090244..eb2986e60c50 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1214,6 +1214,15 @@ static inline void mtmsr_isync(unsigned long val) : "r" ((unsigned long)(v)) \ : "memory") +extern void msr_check_and_set(unsigned long bits); +extern bool strict_msr_control; +extern void __msr_check_and_clear(unsigned long bits); +static inline void msr_check_and_clear(unsigned long bits) +{ + if (strict_msr_control) + __msr_check_and_clear(bits); +} + static inline unsigned long mfvtb (void) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 438502f59550..9414dcb180d6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -4,6 +4,8 @@ #ifndef _ASM_POWERPC_SWITCH_TO_H #define _ASM_POWERPC_SWITCH_TO_H +#include + struct thread_struct; struct task_struct; struct pt_regs; @@ -26,15 +28,15 @@ extern void enable_kernel_spe(void); extern void load_up_spe(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); -static inline void disable_kernel_fp(void) { } -static inline void disable_kernel_altivec(void) { } -static inline void disable_kernel_spe(void) { } -static inline void disable_kernel_vsx(void) { } - #ifdef CONFIG_PPC_FPU extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); extern void __giveup_fpu(struct task_struct *); +static inline void disable_kernel_fp(void) +{ + msr_check_and_clear(MSR_FP); +} + #else static inline void flush_fp_to_thread(struct task_struct *t) { } static inline void giveup_fpu(struct task_struct *t) { } @@ -45,6 +47,10 @@ static inline void __giveup_fpu(struct task_struct *t) { } extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); extern void __giveup_altivec(struct task_struct *); +static inline void disable_kernel_altivec(void) +{ + msr_check_and_clear(MSR_VEC); +} #else static inline void flush_altivec_to_thread(struct task_struct *t) { } static inline void giveup_altivec(struct task_struct *t) { } @@ -53,6 +59,10 @@ static inline void __giveup_altivec(struct task_struct *t) { } #ifdef CONFIG_VSX extern void flush_vsx_to_thread(struct task_struct *); +static inline void disable_kernel_vsx(void) +{ + msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); +} #else static inline void flush_vsx_to_thread(struct task_struct *t) { @@ -63,6 +73,10 @@ static inline void flush_vsx_to_thread(struct task_struct *t) extern void flush_spe_to_thread(struct task_struct *); extern void giveup_spe(struct task_struct *); extern void __giveup_spe(struct task_struct *); +static inline void disable_kernel_spe(void) +{ + msr_check_and_clear(MSR_SPE); +} #else static inline void flush_spe_to_thread(struct task_struct *t) { } static inline void giveup_spe(struct task_struct *t) { } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5cdd35c0b026..1eafceefeac9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -87,7 +87,19 @@ static void check_if_tm_restore_required(struct task_struct *tsk) static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -static void msr_check_and_set(unsigned long bits) +bool strict_msr_control; +EXPORT_SYMBOL(strict_msr_control); + +static int __init enable_strict_msr_control(char *str) +{ + strict_msr_control = true; + pr_info("Enabling strict facility control\n"); + + return 0; +} +early_param("ppc_strict_facility_enable", enable_strict_msr_control); + +void msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -103,7 +115,7 @@ static void msr_check_and_set(unsigned long bits) mtmsr_isync(newmsr); } -static void msr_check_and_clear(unsigned long bits) +void __msr_check_and_clear(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -118,6 +130,7 @@ static void msr_check_and_clear(unsigned long bits) if (oldmsr != newmsr) mtmsr_isync(newmsr); } +EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU void giveup_fpu(struct task_struct *tsk) -- cgit v1.2.3 From 1f2e25b2d552cade43eacb2edc4e7f01c1cfecb3 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:07 +1100 Subject: powerpc: Remove fp_enable() and vec_enable(), use msr_check_and_{set, clear}() More consolidation of our MSR available bit handling. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 -- arch/powerpc/kernel/fpu.S | 16 ---------------- arch/powerpc/kernel/process.c | 6 ++++-- arch/powerpc/kernel/vector.S | 10 ---------- 4 files changed, 4 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a2e891840806..ac2330820b9a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -380,8 +380,6 @@ extern int set_endian(struct task_struct *tsk, unsigned int val); extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr); extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val); -extern void fp_enable(void); -extern void vec_enable(void); extern void load_fp_state(struct thread_fp_state *fp); extern void store_fp_state(struct thread_fp_state *fp); extern void load_vr_state(struct thread_vr_state *vr); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 431ab571ed1b..2117eaca3d28 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -76,22 +76,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -/* - * Enable use of the FPU, and VSX if possible, for the caller. - */ -_GLOBAL(fp_enable) - mfmsr r3 - ori r3,r3,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC - MTMSRD(r3) - isync /* (not necessary for arch 2.02 and later) */ - blr - /* * Load state from memory into FP registers including FPSCR. * Assumes the caller has enabled FP in the MSR. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1eafceefeac9..9f8444b84dde 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -732,13 +732,15 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; if (msr_diff & MSR_FP) { - fp_enable(); + msr_check_and_set(MSR_FP); load_fp_state(¤t->thread.fp_state); + msr_check_and_clear(MSR_FP); regs->msr |= current->thread.fpexc_mode; } if (msr_diff & MSR_VEC) { - vec_enable(); + msr_check_and_set(MSR_VEC); load_vr_state(¤t->thread.vr_state); + msr_check_and_clear(MSR_VEC); } regs->msr |= msr_diff; } diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 98675b08efe2..162d0f714941 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -32,16 +32,6 @@ _GLOBAL(do_load_up_transact_altivec) blr #endif -/* - * Enable use of VMX/Altivec for the caller. - */ -_GLOBAL(vec_enable) - mfmsr r3 - oris r3,r3,MSR_VEC@h - MTMSRD(r3) - isync - blr - /* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. -- cgit v1.2.3 From c208505900b232ecdc81dee54cb3a032e75d88d6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:08 +1100 Subject: powerpc: create giveup_all() Create a single function that gives everything up (FP, VMX, VSX, SPE). Doing this all at once means we only do one MSR write. A context switch microbenchmark using yield(): http://ozlabs.org/~anton/junkcode/context_switch2.c ./context_switch2 --test=yield --fp --altivec --vector 0 0 shows an improvement of 3% on POWER8. Signed-off-by: Anton Blanchard [mpe: giveup_all() needs to be EXPORT_SYMBOL'ed] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 1 + arch/powerpc/kernel/process.c | 76 +++++++++++++++++++++++++++++------- arch/powerpc/kvm/book3s_pr.c | 17 +------- 3 files changed, 64 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9414dcb180d6..8f856788a7cf 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -26,6 +26,7 @@ extern void __giveup_vsx(struct task_struct *); extern void giveup_vsx(struct task_struct *); extern void enable_kernel_spe(void); extern void load_up_spe(struct task_struct *); +extern void giveup_all(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); #ifdef CONFIG_PPC_FPU diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f8444b84dde..4c087b9ed2d6 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -308,6 +308,65 @@ void flush_spe_to_thread(struct task_struct *tsk) } #endif /* CONFIG_SPE */ +static unsigned long msr_all_available; + +static int __init init_msr_all_available(void) +{ +#ifdef CONFIG_PPC_FPU + msr_all_available |= MSR_FP; +#endif +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr_all_available |= MSR_VEC; +#endif +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + msr_all_available |= MSR_VSX; +#endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + msr_all_available |= MSR_SPE; +#endif + + return 0; +} +early_initcall(init_msr_all_available); + +void giveup_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + +#ifdef CONFIG_PPC_FPU + if (usermsr & MSR_FP) + __giveup_fpu(tsk); +#endif +#ifdef CONFIG_ALTIVEC + if (usermsr & MSR_VEC) + __giveup_altivec(tsk); +#endif +#ifdef CONFIG_VSX + if (usermsr & MSR_VSX) + __giveup_vsx(tsk); +#endif +#ifdef CONFIG_SPE + if (usermsr & MSR_SPE) + __giveup_spe(tsk); +#endif + + msr_check_and_clear(msr_all_available); +} +EXPORT_SYMBOL(giveup_all); + #ifdef CONFIG_PPC_ADV_DEBUG_REGS void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int breakpt) @@ -839,21 +898,8 @@ struct task_struct *__switch_to(struct task_struct *prev, __switch_to_tm(prev); - if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) - giveup_fpu(prev); -#ifdef CONFIG_ALTIVEC - if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) - giveup_altivec(prev); -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX - if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX)) - /* VMX and FPU registers are already save here */ - __giveup_vsx(prev); -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) - giveup_spe(prev); -#endif /* CONFIG_SPE */ + /* Save FPU, Altivec, VSX and SPE state */ + giveup_all(prev); #ifdef CONFIG_PPC_ADV_DEBUG_REGS switch_booke_debug_regs(&new->thread.debug); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 49f5dad1bd45..a78e0e6bd932 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1490,21 +1490,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; /* interrupts now hard-disabled */ - /* Save FPU state in thread_struct */ - if (current->thread.regs->msr & MSR_FP) - giveup_fpu(current); - -#ifdef CONFIG_ALTIVEC - /* Save Altivec state in thread_struct */ - if (current->thread.regs->msr & MSR_VEC) - giveup_altivec(current); -#endif - -#ifdef CONFIG_VSX - /* Save VSX state in thread_struct */ - if (current->thread.regs->msr & MSR_VSX) - __giveup_vsx(current); -#endif + /* Save FPU, Altivec and VSX state */ + giveup_all(current); /* Preload FPU if it's enabled */ if (kvmppc_get_msr(vcpu) & MSR_FP) -- cgit v1.2.3 From 579e633e764e6e5f7784b74e7df3e81fe11f40de Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:09 +1100 Subject: powerpc: create flush_all_to_thread() Create a single function that flushes everything (FP, VMX, VSX, SPE). Doing this all at once means we only do one MSR write. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 1 + arch/powerpc/kernel/process.c | 22 ++++++++++++++++++---- arch/powerpc/kernel/swsusp.c | 4 +--- arch/powerpc/kvm/book3s_hv.c | 5 ++--- 4 files changed, 22 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 8f856788a7cf..81d46a433c03 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -27,6 +27,7 @@ extern void giveup_vsx(struct task_struct *); extern void enable_kernel_spe(void); extern void load_up_spe(struct task_struct *); extern void giveup_all(struct task_struct *); +extern void flush_all_to_thread(struct task_struct *); extern void switch_booke_debug_regs(struct debug_reg *new_debug); #ifdef CONFIG_PPC_FPU diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4c087b9ed2d6..7f437e7b273e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -367,6 +367,23 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +void flush_all_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + BUG_ON(tsk != current); + giveup_all(tsk); + +#ifdef CONFIG_SPE + if (tsk->thread.regs->msr & MSR_SPE) + tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); +#endif + + preempt_enable(); + } +} +EXPORT_SYMBOL(flush_all_to_thread); + #ifdef CONFIG_PPC_ADV_DEBUG_REGS void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int breakpt) @@ -1137,10 +1154,7 @@ release_thread(struct task_struct *t) */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - flush_fp_to_thread(src); - flush_altivec_to_thread(src); - flush_vsx_to_thread(src); - flush_spe_to_thread(src); + flush_all_to_thread(src); /* * Flush TM state out so we can copy it. __switch_to_tm() does this * flush but it removes the checkpointed state from the current CPU and diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index eae33e10b65f..6669b1752512 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -20,9 +20,7 @@ void save_processor_state(void) * flush out all the special registers so we don't need * to save them in the snapshot */ - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_spe_to_thread(current); + flush_all_to_thread(current); #ifdef CONFIG_PPC64 hard_irq_disable(); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 54b45b73195f..8e694707bc56 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2700,9 +2700,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) goto out; } - flush_fp_to_thread(current); - flush_altivec_to_thread(current); - flush_vsx_to_thread(current); + flush_all_to_thread(current); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; -- cgit v1.2.3 From f3d885ccba8539f62e8be3ba29ecf91687120252 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:10 +1100 Subject: powerpc: Rearrange __switch_to() Most of __switch_to() is housekeeping, TLB batching, timekeeping etc. Move these away from the more complex and critical context switching code. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 52 +++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7f437e7b273e..49424dc1168d 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -907,30 +907,6 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); - /* - * We need to save SPRs before treclaim/trecheckpoint as these will - * change a number of them. - */ - save_sprs(&prev->thread); - - __switch_to_tm(prev); - - /* Save FPU, Altivec, VSX and SPE state */ - giveup_all(prev); - -#ifdef CONFIG_PPC_ADV_DEBUG_REGS - switch_booke_debug_regs(&new->thread.debug); -#else -/* - * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would - * schedule DABR - */ -#ifndef CONFIG_HAVE_HW_BREAKPOINT - if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) - __set_breakpoint(&new->thread.hw_brk); -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif - #ifdef CONFIG_PPC64 /* * Collect processor utilization data per process @@ -955,6 +931,30 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + switch_booke_debug_regs(&new->thread.debug); +#else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT + if (unlikely(!hw_brk_match(this_cpu_ptr(¤t_brk), &new->thread.hw_brk))) + __set_breakpoint(&new->thread.hw_brk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif + + /* + * We need to save SPRs before treclaim/trecheckpoint as these will + * change a number of them. + */ + save_sprs(&prev->thread); + + __switch_to_tm(prev); + + /* Save FPU, Altivec, VSX and SPE state */ + giveup_all(prev); + /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -970,6 +970,8 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread = &last->thread; new_thread = ¤t->thread; + restore_sprs(old_thread, new_thread); + #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; @@ -978,8 +980,6 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC_BOOK3S_64 */ - restore_sprs(old_thread, new_thread); - return last; } -- cgit v1.2.3 From d1e1cf2e38def301fde42c1a33f896f974941d7b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 29 Oct 2015 11:44:11 +1100 Subject: powerpc: clean up asm/switch_to.h Remove a bunch of unnecessary fallback functions and group things in a more logical way. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 35 ++++++++++------------------------- arch/powerpc/kernel/process.c | 2 +- 2 files changed, 11 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 81d46a433c03..5b268b6be74c 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -14,23 +14,18 @@ extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); #define switch_to(prev, next, last) ((last) = __switch_to((prev), (next))) -struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); -extern void enable_kernel_fp(void); -extern void enable_kernel_altivec(void); -extern void enable_kernel_vsx(void); +extern void switch_booke_debug_regs(struct debug_reg *new_debug); + extern int emulate_altivec(struct pt_regs *); -extern void __giveup_vsx(struct task_struct *); -extern void giveup_vsx(struct task_struct *); -extern void enable_kernel_spe(void); -extern void load_up_spe(struct task_struct *); -extern void giveup_all(struct task_struct *); + extern void flush_all_to_thread(struct task_struct *); -extern void switch_booke_debug_regs(struct debug_reg *new_debug); +extern void giveup_all(struct task_struct *); #ifdef CONFIG_PPC_FPU +extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); extern void __giveup_fpu(struct task_struct *); @@ -38,14 +33,12 @@ static inline void disable_kernel_fp(void) { msr_check_and_clear(MSR_FP); } - #else static inline void flush_fp_to_thread(struct task_struct *t) { } -static inline void giveup_fpu(struct task_struct *t) { } -static inline void __giveup_fpu(struct task_struct *t) { } #endif #ifdef CONFIG_ALTIVEC +extern void enable_kernel_altivec(void); extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); extern void __giveup_altivec(struct task_struct *); @@ -53,25 +46,21 @@ static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } -#else -static inline void flush_altivec_to_thread(struct task_struct *t) { } -static inline void giveup_altivec(struct task_struct *t) { } -static inline void __giveup_altivec(struct task_struct *t) { } #endif #ifdef CONFIG_VSX +extern void enable_kernel_vsx(void); extern void flush_vsx_to_thread(struct task_struct *); +extern void giveup_vsx(struct task_struct *); +extern void __giveup_vsx(struct task_struct *); static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -#else -static inline void flush_vsx_to_thread(struct task_struct *t) -{ -} #endif #ifdef CONFIG_SPE +extern void enable_kernel_spe(void); extern void flush_spe_to_thread(struct task_struct *); extern void giveup_spe(struct task_struct *); extern void __giveup_spe(struct task_struct *); @@ -79,10 +68,6 @@ static inline void disable_kernel_spe(void) { msr_check_and_clear(MSR_SPE); } -#else -static inline void flush_spe_to_thread(struct task_struct *t) { } -static inline void giveup_spe(struct task_struct *t) { } -static inline void __giveup_spe(struct task_struct *t) { } #endif static inline void clear_task_ebb(struct task_struct *t) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 49424dc1168d..58194c3f421e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -174,7 +174,6 @@ void flush_fp_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_fp_to_thread); -#endif /* CONFIG_PPC_FPU */ void enable_kernel_fp(void) { @@ -186,6 +185,7 @@ void enable_kernel_fp(void) __giveup_fpu(current); } EXPORT_SYMBOL(enable_kernel_fp); +#endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC void giveup_altivec(struct task_struct *tsk) -- cgit v1.2.3 From d64d02ce4ebaa79bf1c026e81a956f133938af65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 10 Dec 2015 20:04:05 +1100 Subject: powerpc: Call check_if_tm_restore_required() in enable_kernel_*() Commit a0e72cf12b1a ("powerpc: Create msr_check_and_{set,clear}()") removed a call to check_if_tm_restore_required() in the enable_kernel_*() functions. Add them back in. Fixes: a0e72cf12b1a ("powerpc: Create msr_check_and_{set,clear}()") Reported-by: Rashmica Gupta Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 58194c3f421e..1eeda3b80b65 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -181,8 +181,10 @@ void enable_kernel_fp(void) msr_check_and_set(MSR_FP); - if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { + check_if_tm_restore_required(current); __giveup_fpu(current); + } } EXPORT_SYMBOL(enable_kernel_fp); #endif /* CONFIG_PPC_FPU */ @@ -204,8 +206,10 @@ void enable_kernel_altivec(void) msr_check_and_set(MSR_VEC); - if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { + check_if_tm_restore_required(current); __giveup_altivec(current); + } } EXPORT_SYMBOL(enable_kernel_altivec); @@ -249,6 +253,7 @@ void enable_kernel_vsx(void) msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + check_if_tm_restore_required(current); if (current->thread.regs->msr & MSR_FP) __giveup_fpu(current); if (current->thread.regs->msr & MSR_VEC) @@ -289,8 +294,10 @@ void enable_kernel_spe(void) msr_check_and_set(MSR_SPE); - if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) { + check_if_tm_restore_required(current); __giveup_spe(current); + } } EXPORT_SYMBOL(enable_kernel_spe); -- cgit v1.2.3 From 20dbe67062062c2a790832f0d30e73dba45df7c4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 10 Dec 2015 20:44:39 +1100 Subject: powerpc: Call restore_sprs() before _switch() commit 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()") moved the restore of SPRs after the call to _switch(). There is an issue with this approach - new tasks do not return through _switch(), they are set up by copy_thread() to directly return through ret_from_fork() or ret_from_kernel_thread(). This means restore_sprs() is not getting called for new tasks. Fix this by moving restore_sprs() before _switch(). Fixes: 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()") Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1eeda3b80b65..9da7b5f0c3a5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -971,14 +971,17 @@ struct task_struct *__switch_to(struct task_struct *prev, tm_recheckpoint_new_task(new); - last = _switch(old_thread, new_thread); - - /* Need to recalculate these after calling _switch() */ - old_thread = &last->thread; - new_thread = ¤t->thread; - + /* + * Call restore_sprs() before calling _switch(). If we move it after + * _switch() then we miss out on calling it for new tasks. The reason + * for this is we manually create a stack frame for new tasks that + * directly returns through ret_from_fork() or + * ret_from_kernel_thread(). See copy_thread() for details. + */ restore_sprs(old_thread, new_thread); + last = _switch(old_thread, new_thread); + #ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; -- cgit v1.2.3 From db1231dcdb4dc6cdcbdef0babe641a9162c0dc98 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 9 Dec 2015 20:11:47 +1100 Subject: powerpc: Fix DSCR inheritance over fork() Two DSCR tests have a hack in them: /* * XXX: Force a context switch out so that DSCR * current value is copied into the thread struct * which is required for the child to inherit the * changed value. */ sleep(1); We should not be working around this in the testcase, it is a kernel bug. Fix it by copying the current DSCR to the child, instead of what we had in the thread struct at last context switch. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c | 8 -------- tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c | 8 -------- 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9da7b5f0c3a5..6f76f25c3ee8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1287,7 +1287,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_DSCR)) { p->thread.dscr_inherit = current->thread.dscr_inherit; - p->thread.dscr = current->thread.dscr; + p->thread.dscr = mfspr(SPRN_DSCR); } if (cpu_has_feature(CPU_FTR_HAS_PPR)) p->thread.ppr = INIT_PPR; diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 8265504de571..08a8b95e3bc1 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -60,14 +60,6 @@ int dscr_inherit_exec(void) else set_dscr(dscr); - /* - * XXX: Force a context switch out so that DSCR - * current value is copied into the thread struct - * which is required for the child to inherit the - * changed value. - */ - sleep(1); - pid = fork(); if (pid == -1) { perror("fork() failed"); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c index 4e414caf7f40..3e5a6d195e9a 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -40,14 +40,6 @@ int dscr_inherit(void) else set_dscr(dscr); - /* - * XXX: Force a context switch out so that DSCR - * current value is copied into the thread struct - * which is required for the child to inherit the - * changed value. - */ - sleep(1); - pid = fork(); if (pid == -1) { perror("fork() failed"); -- cgit v1.2.3 From 106713a14590cd7b223db000f4f47f7d1d898153 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Dec 2015 09:06:44 +0530 Subject: powerpc/mm: Remove the dependency on pte bit position in asm code We should not expect pte bit position in asm code. Simply by moving part of that to C Acked-by: Scott Wood Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 18 ++++-------------- arch/powerpc/mm/hash_utils_64.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1a03142a69fd..3419cbf2ad59 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1556,29 +1556,19 @@ do_hash_page: lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ bne 77f /* then don't call hash_page now */ - /* - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are - * accessing a userspace segment (even from the kernel). We assume - * kernel addresses always have the high bit set. - */ - rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ - rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ - orc r0,r12,r0 /* MSR_PR | ~high_bit */ - rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ - ori r4,r4,1 /* add _PAGE_PRESENT */ - rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ /* * r3 contains the faulting address - * r4 contains the required access permissions + * r4 msr * r5 contains the trap number * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + mr r4,r12 ld r6,_DSISR(r1) - bl hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if hash_page succeeded */ + bl __hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if __hash_page succeeded */ /* Success */ beq fast_exc_return_irq /* Return from exception on success */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 995809911f17..30b7648e687a 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1206,6 +1206,35 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); +int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, + unsigned long dsisr) +{ + unsigned long access = _PAGE_PRESENT; + unsigned long flags = 0; + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + if (dsisr & DSISR_ISSTORE) + access |= _PAGE_RW; + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) + access |= _PAGE_USER; + + if (trap == 0x400) + access |= _PAGE_EXEC; + + return hash_page_mm(mm, ea, access, trap, flags); +} + void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { -- cgit v1.2.3 From 801c0b2c4db3a33d56b3e19240df7b897e5bbfbc Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 20 Nov 2015 15:15:32 +1100 Subject: powerpc: Print MSR TM bits in oops messages Print MSR TM bits in oops messages. This appends them to the end like this: MSR: 8000000502823031 You get the TM[] only if at least one TM MSR bit is set. Inside the TM[], E means Enabled (bit 32), S means Suspended (bit 33), and T means Transactional (bit 34) If no bits are set, you get no TM[] output. Include rework of printbits() to handle this case. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 51 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f76f25c3ee8..ab9373bfabda 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1033,10 +1033,12 @@ static void show_instructions(struct pt_regs *regs) printk("\n"); } -static struct regbit { +struct regbit { unsigned long bit; const char *name; -} msr_bits[] = { +}; + +static struct regbit msr_bits[] = { #if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) {MSR_SF, "SF"}, {MSR_HV, "HV"}, @@ -1066,16 +1068,49 @@ static struct regbit { {0, NULL} }; -static void printbits(unsigned long val, struct regbit *bits) +static void print_bits(unsigned long val, struct regbit *bits, const char *sep) { - const char *sep = ""; + const char *s = ""; - printk("<"); for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", sep, bits->name); - sep = ","; + printk("%s%s", s, bits->name); + s = sep; } +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static struct regbit msr_tm_bits[] = { + {MSR_TS_T, "T"}, + {MSR_TS_S, "S"}, + {MSR_TM, "E"}, + {0, NULL} +}; + +static void print_tm_bits(unsigned long val) +{ +/* + * This only prints something if at least one of the TM bit is set. + * Inside the TM[], the output means: + * E: Enabled (bit 32) + * S: Suspended (bit 33) + * T: Transactional (bit 34) + */ + if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { + printk(",TM["); + print_bits(val, msr_tm_bits, ""); + printk("]"); + } +} +#else +static void print_tm_bits(unsigned long val) {} +#endif + +static void print_msr_bits(unsigned long val) +{ + printk("<"); + print_bits(val, msr_bits, ","); + print_tm_bits(val); printk(">"); } @@ -1100,7 +1135,7 @@ void show_regs(struct pt_regs * regs) printk("REGS: %p TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); - printbits(regs->msr, msr_bits); + print_msr_bits(regs->msr); printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) -- cgit v1.2.3 From 00b912b0c88e690b1662067497182454357b18b0 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 15 Dec 2015 18:09:14 +1100 Subject: powerpc: Remove broken GregorianDay() GregorianDay() is supposed to calculate the day of the week (tm->tm_wday) for a given day/month/year. In that calcuation it indexed into an array called MonthOffset using tm->tm_mon-1. However tm_mon is zero-based, not one-based, so this is off-by-one. It also means that every January, GregoiranDay() will access element -1 of the MonthOffset array. It also doesn't appear to be a correct algorithm either: see in contrast kernel/time/timeconv.c's time_to_tm function. It's been broken forever, which suggests no-one in userland uses this. It looks like no-one in the kernel uses tm->tm_wday either (see e.g. drivers/rtc/rtc-ds1305.c:319). tm->tm_wday is conventionally set to -1 when not available in hardware so we can simply set it to -1 and drop the function. (There are over a dozen other drivers in drivers/rtc that do this.) Found using UBSAN. Cc: Andrey Ryabinin Cc: Andrew Morton # as an example of what UBSan finds. Cc: Alessandro Zummo Cc: Alexandre Belloni Cc: rtc-linux@googlegroups.com Signed-off-by: Daniel Axtens Acked-by: Alexandre Belloni Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/time.h | 1 - arch/powerpc/kernel/time.c | 36 ++----------------------------- arch/powerpc/platforms/maple/time.c | 2 +- arch/powerpc/platforms/powernv/opal-rtc.c | 3 +-- drivers/rtc/rtc-opal.c | 2 +- 5 files changed, 5 insertions(+), 39 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 10fc784a2ad4..2d7109a8d296 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -27,7 +27,6 @@ extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); -extern void GregorianDay(struct rtc_time *tm); extern void tick_broadcast_ipi_handler(void); extern void generic_calibrate_decr(void); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1be1092c7204..81b0900a39ee 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -1002,38 +1002,6 @@ static int month_days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; -/* - * This only works for the Gregorian calendar - i.e. after 1752 (in the UK) - */ -void GregorianDay(struct rtc_time * tm) -{ - int leapsToDate; - int lastYear; - int day; - int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; - - lastYear = tm->tm_year - 1; - - /* - * Number of leap corrections to apply up to end of last year - */ - leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400; - - /* - * This year is a leap year if it is divisible by 4 except when it is - * divisible by 100 unless it is divisible by 400 - * - * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was - */ - day = tm->tm_mon > 2 && leapyear(tm->tm_year); - - day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] + - tm->tm_mday; - - tm->tm_wday = day % 7; -} -EXPORT_SYMBOL_GPL(GregorianDay); - void to_tm(int tim, struct rtc_time * tm) { register int i; @@ -1064,9 +1032,9 @@ void to_tm(int tim, struct rtc_time * tm) tm->tm_mday = day + 1; /* - * Determine the day of week + * No-one uses the day of the week. */ - GregorianDay(tm); + tm->tm_wday = -1; } EXPORT_SYMBOL(to_tm); diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index b4a369dac3a8..81799d70a1ee 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -77,7 +77,7 @@ void maple_get_rtc_time(struct rtc_time *tm) if ((tm->tm_year + 1900) < 1970) tm->tm_year += 100; - GregorianDay(tm); + tm->tm_wday = -1; } int maple_set_rtc_time(struct rtc_time *tm) diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 37dbee15769f..1b149c92fca1 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -31,8 +31,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); - - GregorianDay(tm); + tm->tm_wday = -1; } unsigned long __init opal_get_boot_time(void) diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index df39ce02a99d..9c18d6fd8107 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -40,7 +40,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); - GregorianDay(tm); + tm->tm_wday = -1; } static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) -- cgit v1.2.3 From 209eb4e5cbaba53ab555f3e7b43aa27176f3a925 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Dec 2015 21:01:42 +1100 Subject: powerpc/rtas: Add rtas_call_unlocked() Most users of RTAS (Run-Time Abstraction Services) use rtas_call(), which deals with locking as well as endian handling. However we have two users outside of rtas.c that can't use rtas_call() because they have different locking requirements. The hotplug CPU code can't take the RTAS lock because the CPU would go offline with the lock held and no other CPUs would be able to call RTAS until the CPU came back online. The xmon code doesn't want to take the lock because it would risk dead locking when we are trying to recover from a crash. Both sites required multiple patches when we added little endian support, proving that programmers can't do endian right. Although that ship has sailed, we can still clean the code up by providing an unlocked version of rtas_call() which avoids the need to open code the logic elsewhere. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/rtas.h | 2 ++ arch/powerpc/kernel/rtas.c | 44 ++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b77ef369c0f0..6db1d6977a0d 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -338,6 +338,8 @@ extern void enter_rtas(unsigned long); extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); +void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, + int nret, ...); extern void rtas_restart(char *cmd); extern void rtas_power_off(void); extern void rtas_halt(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 5a753fae8265..fcf2d653a6fe 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -418,6 +418,36 @@ static char *__fetch_rtas_last_error(char *altbuf) #define get_errorlog_buffer() NULL #endif + +static void +va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, + va_list list) +{ + int i; + + args->token = cpu_to_be32(token); + args->nargs = cpu_to_be32(nargs); + args->nret = cpu_to_be32(nret); + args->rets = &(args->args[nargs]); + + for (i = 0; i < nargs; ++i) + args->args[i] = cpu_to_be32(va_arg(list, __u32)); + + for (i = 0; i < nret; ++i) + args->rets[i] = 0; + + enter_rtas(__pa(args)); +} + +void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) +{ + va_list list; + + va_start(list, nret); + va_rtas_call_unlocked(args, token, nargs, nret, list); + va_end(list); +} + int rtas_call(int token, int nargs, int nret, int *outputs, ...) { va_list list; @@ -431,22 +461,14 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) return -1; s = lock_rtas(); + + /* We use the global rtas args buffer */ rtas_args = &rtas.args; - rtas_args->token = cpu_to_be32(token); - rtas_args->nargs = cpu_to_be32(nargs); - rtas_args->nret = cpu_to_be32(nret); - rtas_args->rets = &(rtas_args->args[nargs]); va_start(list, outputs); - for (i = 0; i < nargs; ++i) - rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32)); + va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); va_end(list); - for (i = 0; i < nret; ++i) - rtas_args->rets[i] = 0; - - enter_rtas(__pa(rtas_args)); - /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ if (be32_to_cpu(rtas_args->rets[0]) == -1) -- cgit v1.2.3 From 4456f4524604be2558e5f6a8e0f7cc9ed17c783e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Nov 2015 22:26:11 +1100 Subject: powerpc/rtas: Use rtas_call_unlocked() in call_rtas_display_status() Although call_rtas_display_status() does actually want to use the regular RTAS locking, it doesn't want the extra logic that is in rtas_call(), so currently it open codes the logic. Instead we can use rtas_call_unlocked(), after taking the RTAS lock. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index fcf2d653a6fe..f4fa137292c4 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -93,21 +93,13 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - struct rtas_args *args = &rtas.args; unsigned long s; if (!rtas.base) return; - s = lock_rtas(); - - args->token = cpu_to_be32(10); - args->nargs = cpu_to_be32(1); - args->nret = cpu_to_be32(1); - args->rets = &(args->args[1]); - args->args[0] = cpu_to_be32(c); - - enter_rtas(__pa(args)); + s = lock_rtas(); + rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); unlock_rtas(s); } -- cgit v1.2.3 From cd5cdeb6c8a42fb87644b0eb5d240f6ce6172402 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 24 Nov 2015 22:26:12 +1100 Subject: powerpc/rtas: Make enter_rtas() private There are no longer any users of enter_rtas() outside of rtas.c, so make it "private", by moving the declaration inside rtas.c. Hopefully this will encourage people to use one of the wrappers which takes the sharp edges off the RTAS calling sequence. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/rtas.h | 1 - arch/powerpc/kernel/rtas.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 6db1d6977a0d..51400baa8d48 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -334,7 +334,6 @@ extern void (*rtas_flash_term_hook)(int); extern struct rtas_t rtas; -extern void enter_rtas(unsigned long); extern int rtas_token(const char *service); extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f4fa137292c4..28736ff27fea 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -44,6 +44,9 @@ #include #include +/* This is here deliberately so it's only used in this file */ +void enter_rtas(unsigned long); + struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; -- cgit v1.2.3 From d6265aeaf815801ad53a95f11cea8ea752862176 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Nov 2015 14:25:16 +1100 Subject: powerpc/kernel: Drop HMT_MEDIUM_PPR_DISCARD HMT_MEDIUM_PPR_DISCARD is a macro which is present at the start of most of our first level exception handlers. It conditionally executes a HMT_MEDIUM instruction, which sets the processor priority to medium. On on modern systems, ie. Power7 and later, it is nop'ed out at boot. All it does is make the exception vectors more cramped, and consume 4 bytes of icache. On old systems it has the effect of boosting the processor priority at the start of exception processing. If we were previously in the idle loop for example, we may be at low or very low priority. This is desirable as we want to process the exception as fast as possible. However looking closely at the generated code, we see that in all cases we execute another HMT_MEDIUM just four instructions later. With code patching applied, the final code on an old (Power6) system will look like, eg: c000000000000300 : c000000000000300: 7c 42 13 78 mr r2,r2 <- c000000000000304: 7d b2 43 a6 mtsprg 2,r13 c000000000000308: 7d b1 42 a6 mfsprg r13,1 c00000000000030c: f9 2d 00 80 std r9,128(r13) c000000000000310: 60 00 00 00 nop c000000000000314: 7c 42 13 78 mr r2,r2 <- So I suggest that the added code complexity of HMT_MEDIUM_PPR_DISCARD is not justified by the benefit of boosting the processor priority for the duration of four instructions, and therefore we drop it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 15 --------------- arch/powerpc/kernel/exceptions-64s.S | 9 --------- 2 files changed, 24 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9ee10781121f..60b2bbda212d 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -129,15 +129,6 @@ BEGIN_FTR_SECTION_NESTED(941) \ mtspr SPRN_PPR,ra; \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) -/* - * Increase the priority on systems where PPR save/restore is not - * implemented/ supported. - */ -#define HMT_MEDIUM_PPR_DISCARD \ -BEGIN_FTR_SECTION_NESTED(942) \ - HMT_MEDIUM; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942) /*non P7*/ - /* * Get an SPR into a register if the CPU has the given feature */ @@ -346,7 +337,6 @@ do_kvm_##n: \ . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_STD, KVMTEST, vec) @@ -362,7 +352,6 @@ label##_pSeries: \ . = loc; \ .globl label##_hv; \ label##_hv: \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ EXC_HV, KVMTEST, vec) @@ -378,7 +367,6 @@ label##_hv: \ . = loc; \ .globl label##_relon_pSeries; \ label##_relon_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ @@ -394,7 +382,6 @@ label##_relon_pSeries: \ . = loc; \ .globl label##_relon_hv; \ label##_relon_hv: \ - HMT_MEDIUM_PPR_DISCARD; \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ @@ -448,7 +435,6 @@ label##_relon_hv: \ . = loc; \ .globl label##_pSeries; \ label##_pSeries: \ - HMT_MEDIUM_PPR_DISCARD; \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_STD, SOFTEN_TEST_PR) @@ -466,7 +452,6 @@ label##_hv: \ EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ - HMT_MEDIUM_PPR_DISCARD; \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3419cbf2ad59..0757f23d35aa 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -96,7 +96,6 @@ __start_interrupts: .globl system_reset_pSeries; system_reset_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -164,7 +163,6 @@ machine_check_pSeries_1: * some code path might still want to branch into the original * vector */ - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -199,7 +197,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) . = 0x300 .globl data_access_pSeries data_access_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, KVMTEST, 0x300) @@ -207,7 +204,6 @@ data_access_pSeries: . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) @@ -239,7 +235,6 @@ data_access_slb_pSeries: . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) @@ -269,7 +264,6 @@ instruction_access_slb_pSeries: .globl hardware_interrupt_hv; hardware_interrupt_pSeries: hardware_interrupt_hv: - HMT_MEDIUM_PPR_DISCARD BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) @@ -413,7 +407,6 @@ hv_facility_unavailable_trampoline: . = 0x1500 .global denorm_exception_hv denorm_exception_hv: - HMT_MEDIUM_PPR_DISCARD mtspr SPRN_SPRG_HSCRATCH0,r13 EXCEPTION_PROLOG_0(PACA_EXGEN) EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) @@ -527,7 +520,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) machine_check_pSeries: .globl machine_check_fwnmi machine_check_fwnmi: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) machine_check_pSeries_0: @@ -711,7 +703,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) .globl system_reset_fwnmi .align 7 system_reset_fwnmi: - HMT_MEDIUM_PPR_DISCARD SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) -- cgit v1.2.3 From d030a4b5eb7e9514c15b84a765bcc395cc26ab40 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Nov 2015 14:25:17 +1100 Subject: powerpc/kernel: Open code HMT_MEDIUM_LOW_HAS_PPR HMT_MEDIUM_LOW_HAS_PPR is only used in once place, open code it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 5 ----- arch/powerpc/kernel/entry_64.S | 6 +++++- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index dd0fc18d8103..67f05d4935a0 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -418,11 +418,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) * PPR restore macros used in entry_64.S * Used for P7 or later processors */ -#define HMT_MEDIUM_LOW_HAS_PPR \ -BEGIN_FTR_SECTION_NESTED(944) \ - HMT_MEDIUM_LOW; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944) - #define SET_DEFAULT_THREAD_PPR(ra, rb) \ BEGIN_FTR_SECTION_NESTED(945) \ lis ra,INIT_PPR@highest; /* default ppr=3 */ \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index c8b4225a0095..651a65552ac8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,7 +223,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) beq- 1f ACCOUNT_CPU_USER_EXIT(r11, r12) - HMT_MEDIUM_LOW_HAS_PPR + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) -- cgit v1.2.3 From d8725ce86c37fa750fc01f739ee4d4ced39167da Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Nov 2015 14:25:18 +1100 Subject: powerpc/kernel: Open code SET_DEFAULT_THREAD_PPR This is only used in one location, open code it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 13 ------------- arch/powerpc/kernel/entry_64.S | 8 +++++++- 2 files changed, 7 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 67f05d4935a0..499d9f89435a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -413,19 +413,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) FTR_SECTION_ELSE_NESTED(848); \ mtocrf (FXM), RS; \ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848) - -/* - * PPR restore macros used in entry_64.S - * Used for P7 or later processors - */ -#define SET_DEFAULT_THREAD_PPR(ra, rb) \ -BEGIN_FTR_SECTION_NESTED(945) \ - lis ra,INIT_PPR@highest; /* default ppr=3 */ \ - ld rb,PACACURRENT(r13); \ - sldi ra,ra,32; /* 11- 13 bits are used for ppr */ \ - std ra,TASKTHREADPPR(rb); \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945) - #endif /* diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 651a65552ac8..0d525ce3717f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -316,7 +316,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything else left to do? */ - SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */ +BEGIN_FTR_SECTION + lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */ + ld r10,PACACURRENT(r13) + sldi r3,r3,32 /* bits 11-13 are used for ppr */ + std r3,TASKTHREADPPR(r10) +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP) beq ret_from_except_lite -- cgit v1.2.3 From 2613265cb5b07a46bc01eb67202874136efd7049 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 16 Dec 2015 21:10:22 +1100 Subject: powerpc/kernel: Combine vec/loc for STD_EXCEPTION_PSERIES The STD_EXCEPTION_PSERIES macro takes both a vector number, and a location (memory address). However both are always identical, so combine them to save repeating ourselves. This does mean an exception handler must always exist at the location in memory that matches its vector number. But that's OK because this is the "STD" macro (standard), which does exactly that. We have other macros for the other cases, eg. STD_EXCEPTION_PSERIES_OOL (out of line). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 4 ++-- arch/powerpc/kernel/exceptions-64s.S | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 60b2bbda212d..93ae809fe5ea 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -333,8 +333,8 @@ do_kvm_##n: \ /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(loc, vec, label) \ - . = loc; \ +#define STD_EXCEPTION_PSERIES(vec, label) \ + . = vec; \ .globl label##_pSeries; \ label##_pSeries: \ SET_SCRATCH0(r13); /* save r13 */ \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0757f23d35aa..7716cebf4b8e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -230,7 +230,7 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries @@ -274,13 +274,13 @@ hardware_interrupt_hv: KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + STD_EXCEPTION_PSERIES(0x600, alignment) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) - STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + STD_EXCEPTION_PSERIES(0x700, program_check) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) - STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + STD_EXCEPTION_PSERIES(0x800, fp_unavailable) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) . = 0x900 @@ -293,7 +293,7 @@ decrementer_pSeries: MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) - STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) + STD_EXCEPTION_PSERIES(0xb00, trap_0b) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 @@ -325,7 +325,7 @@ system_call_pSeries: SYSCALL_PSERIES_3 KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) - STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + STD_EXCEPTION_PSERIES(0xd00, single_step) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch @@ -401,7 +401,7 @@ hv_facility_unavailable_trampoline: KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) . = 0x1500 @@ -428,7 +428,7 @@ denorm_exception_hv: KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) + STD_EXCEPTION_PSERIES(0x1700, altivec_assist) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) #ifdef CONFIG_CBE_RAS -- cgit v1.2.3 From 4450022b4952ce67d2f3006b4c38e12a0f38cd77 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 14 Dec 2015 14:31:24 +1100 Subject: powerpc/476fpe: Add support for kexec PPC476FPE has a different PVR from previous PPC476 processors. The kexec code checks the PVR in order to correctly setup the MMU. When the initial support for 476FPE processors was added the corresponding change in the kexec code was missed. This patch simply adds the check and solves the following bug on kexec: kexec: Starting new kernel Bye! Unable to handle kernel paging request for instruction fetch Faulting instruction address: 0xee9a50f8 cpu 0x0: Vector: 400 (Instruction Access) at [ee9d7d20] pc: ee9a50f8 lr: ee9a50e4 sp: ee9d7dd0 msr: 21020 current = 0xee40f000 pid = 960, comm = kexec enter ? for help [link register ] ee9a50e4 [ee9d7dd0] c0013748 default_machine_kexec+0x58/0x70 (unreliable) [ee9d7df0] c0012f04 machine_kexec+0x34/0x40 [ee9d7e00] c00aa1ec kernel_kexec+0x9c/0xb0 [ee9d7e20] c005d704 SyS_reboot+0x1f4/0x220 [ee9d7f40] c000db68 ret_from_syscall+0x0/0x3c Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_32.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index ed3ab509faca..be8edd67f05b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -743,6 +743,8 @@ relocate_new_kernel: /* Check for 47x cores */ mfspr r3,SPRN_PVR srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq setup_map_47x cmplwi cr0,r3,PVR_476@h beq setup_map_47x cmplwi cr0,r3,PVR_476_ISS@h -- cgit v1.2.3 From 1b855e167b90fcb353977c08932d0a52eb8ae5b9 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 17 Dec 2015 19:41:00 +1100 Subject: powerpc: Add missing calls to va_end() cppcheck picked up that there were a couple of missing va_end() calls in functions using va_start(). Signed-off-by: Daniel Axtens Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 1 + arch/powerpc/platforms/powermac/bootx_init.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 92dea8df6b26..da5192590c44 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -389,6 +389,7 @@ static void __init prom_printf(const char *format, ...) break; } } + va_end(args); } diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 76f5013c35e5..c3c9bbb3573a 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -84,6 +84,7 @@ static void __init bootx_printf(const char *format, ...) break; } } + va_end(args); } #else /* CONFIG_BOOTX_TEXT */ static void __init bootx_printf(const char *format, ...) {} -- cgit v1.2.3 From c395465da68bfc3a238d5bc15f862e33e6e9ecec Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 28 Oct 2015 15:54:06 +1100 Subject: powerpc: Add function to copy mm_context_t to the paca This adds a function to copy the mm->context to the paca. This is only a basic conversion for now but will be used more extensively in the next patch. This also adds #ifdef CONFIG_PPC_BOOK3S around this code since it's not used elsewhere. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/paca.h | 11 +++++++++++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/mm/hash_utils_64.c | 5 +++-- arch/powerpc/mm/slb.c | 2 +- arch/powerpc/mm/slice.c | 3 +-- 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 70bd4381f8e6..1cc6e0828907 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -131,7 +131,9 @@ struct paca_struct { struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ +#ifdef CONFIG_PPC_BOOK3S mm_context_t context; +#endif /* * then miscellaneous read-write fields @@ -194,6 +196,15 @@ struct paca_struct { #endif }; +#ifdef CONFIG_PPC_BOOK3S +static inline void copy_mm_to_paca(mm_context_t *context) +{ + get_paca()->context = *context; +} +#else +static inline void copy_mm_to_paca(mm_context_t *context){} +#endif + extern struct paca_struct *paca; extern void initialise_paca(struct paca_struct *new_paca, int cpu); extern void setup_paca(struct paca_struct *new_paca); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 221d584d089f..9db7be292bf3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,6 +185,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); +#ifdef CONFIG_PPC_BOOK3S DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, @@ -193,6 +194,7 @@ int main(void) context.high_slices_psize)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ +#endif #ifdef CONFIG_PPC_BOOK3E DEFINE(PACAPGD, offsetof(struct paca_struct, pgd)); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4233dcccbaf7..03279eac0957 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -882,7 +882,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { - get_paca()->context = mm->context; + + copy_mm_to_paca(&mm->context); slb_flush_and_rebolt(); } } @@ -949,7 +950,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, { if (user_region) { if (psize != get_paca_psize(ea)) { - get_paca()->context = mm->context; + copy_mm_to_paca(&mm->context); slb_flush_and_rebolt(); } } else if (get_paca()->vmalloc_sllp != diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 515730e499fe..825b6873391f 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -228,7 +228,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) asm volatile("slbie %0" : : "r" (slbie_data)); get_paca()->slb_cache_ptr = 0; - get_paca()->context = mm->context; + copy_mm_to_paca(&mm->context); /* * preload some userspace segments into the SLB. diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 0f432a702870..42954f0b47ac 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -185,8 +185,7 @@ static void slice_flush_segments(void *parm) if (mm != current->active_mm) return; - /* update the paca copy of the context struct */ - get_paca()->context = current->active_mm->context; + copy_mm_to_paca(¤t->active_mm->context); local_irq_save(flags); slb_flush_and_rebolt(); -- cgit v1.2.3 From 2fc251a8dda56b71ec491bee4c6897e3e12c0739 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 11 Dec 2015 09:34:42 +1100 Subject: powerpc: Copy only required pieces of the mm_context_t to the paca Currently we copy the whole mm_context_t to the paca but only access a few bits of it. This is wasteful of space paca and also takes quite some time in the hot path of context switching. This patch pulls in only the required bits from the mm_context_t to the paca and on context switch, copies only those. Benchmarking this (On top of Anton's recent MSR context switching changes [1]) using processes and yield shows an improvement of almost 3% on POWER8: http://ozlabs.org/~anton/junkcode/context_switch2.c ./context_switch2 --test=yield --process 0 0 1. https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-October/135700.html Signed-off-by: Michael Neuling [mpe: Rename paca fields to be mm_ctx_foo rather than context_foo] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/paca.h | 18 ++++++++++++++++-- arch/powerpc/kernel/asm-offsets.c | 8 ++++---- arch/powerpc/mm/hash_utils_64.c | 4 ++-- 3 files changed, 22 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1cc6e0828907..ef78c288c712 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -16,6 +16,7 @@ #ifdef CONFIG_PPC64 +#include #include #include #include @@ -132,7 +133,13 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S - mm_context_t context; + mm_context_id_t mm_ctx_id; +#ifdef CONFIG_PPC_MM_SLICES + u64 mm_ctx_low_slices_psize; + unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; +#else + u16 mm_ctx_sllp; +#endif #endif /* @@ -199,7 +206,14 @@ struct paca_struct { #ifdef CONFIG_PPC_BOOK3S static inline void copy_mm_to_paca(mm_context_t *context) { - get_paca()->context = *context; + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize; + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, SLICE_ARRAY_SIZE); +#else + get_paca()->mm_ctx_sllp = context->sllp; +#endif } #else static inline void copy_mm_to_paca(mm_context_t *context){} diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9db7be292bf3..07cebc3514f3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -186,12 +186,12 @@ int main(void) DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened)); #ifdef CONFIG_PPC_BOOK3S - DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, - context.low_slices_psize)); + mm_ctx_low_slices_psize)); DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct, - context.high_slices_psize)); + mm_ctx_high_slices_psize)); DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); #endif /* CONFIG_PPC_MM_SLICES */ #endif @@ -224,7 +224,7 @@ int main(void) #ifdef CONFIG_PPC_MM_SLICES DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp)); #else - DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp)); + DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp)); #endif /* CONFIG_PPC_MM_SLICES */ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 03279eac0957..db744576d730 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -853,11 +853,11 @@ static unsigned int get_paca_psize(unsigned long addr) unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - lpsizes = get_paca()->context.low_slices_psize; + lpsizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); return (lpsizes >> (index * 4)) & 0xF; } - hpsizes = get_paca()->context.high_slices_psize; + hpsizes = get_paca()->mm_ctx_high_slices_psize; index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 0x1; return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; -- cgit v1.2.3 From 35de3b1aa16842214e0cd7c6036daf4619294314 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 8 Dec 2015 13:50:56 -0500 Subject: powerpc: Implement save_stack_trace_regs() to enable kprobe stack tracing It has come to my attention that kprobe event stack tracing does not work on powerpc. You can see with the following: # cd /sys/kernel/debug/tracing # echo stacktrace > trace_options # echo 'p kfree' > kprobe_events # echo 1 > events/kprobes/enable Will print the following warning: save_stack_trace_regs() not implemented yet. Although save_stack_trace() (which normal event stack traces use) is implemented, save_stack_trace_regs() which kprobe events use is not. This is a cheap attempt to implement that function. Note, This may have issues if a task tries to get a stack trace from another task with its regs, because it just passes in "current" to save_context_stack(). But this does solve the issue with stack tracing kprobe events. Reported-by: Chunyu Hu Signed-off-by: Steven Rostedt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/stacktrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index ea43a347a104..4f24606afc3f 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -61,3 +61,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) save_context_stack(trace, tsk->thread.ksp, tsk, 0); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +void +save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + save_context_stack(trace, regs->gpr[1], current, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace_regs); -- cgit v1.2.3 From a61674bdfc7c2bf909c4010699607b62b69b7bec Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 12 Jan 2016 23:14:23 +1100 Subject: powerpc/module: Handle R_PPC64_ENTRY relocations GCC 6 will include changes to generated code with -mcmodel=large, which is used to build kernel modules on powerpc64le. This was necessary because the large model is supposed to allow arbitrary sizes and locations of the code and data sections, but the ELFv2 global entry point prolog still made the unconditional assumption that the TOC associated with any particular function can be found within 2 GB of the function entry point: func: addis r2,r12,(.TOC.-func)@ha addi r2,r2,(.TOC.-func)@l .localentry func, .-func To remove this assumption, GCC will now generate instead this global entry point prolog sequence when using -mcmodel=large: .quad .TOC.-func func: .reloc ., R_PPC64_ENTRY ld r2, -8(r12) add r2, r2, r12 .localentry func, .-func The new .reloc triggers an optimization in the linker that will replace this new prolog with the original code (see above) if the linker determines that the distance between .TOC. and func is in range after all. Since this new relocation is now present in module object files, the kernel module loader is required to handle them too. This patch adds support for the new relocation and implements the same optimization done by the GNU linker. Cc: stable@vger.kernel.org Signed-off-by: Ulrich Weigand Signed-off-by: Michael Ellerman --- arch/powerpc/include/uapi/asm/elf.h | 2 ++ arch/powerpc/kernel/module_64.c | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 59dad113897b..c2d21d11c2d2 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -295,6 +295,8 @@ do { \ #define R_PPC64_TLSLD 108 #define R_PPC64_TOCSAVE 109 +#define R_PPC64_ENTRY 118 + #define R_PPC64_REL16 249 #define R_PPC64_REL16_LO 250 #define R_PPC64_REL16_HI 251 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 68384514506b..59663af9315f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, */ break; + case R_PPC64_ENTRY: + /* + * Optimize ELFv2 large code model entry point if + * the TOC is within 2GB range of current location. + */ + value = my_r2(sechdrs, me) - (unsigned long)location; + if (value + 0x80008000 > 0xffffffff) + break; + /* + * Check for the large code model prolog sequence: + * ld r2, ...(r12) + * add r2, r2, r12 + */ + if ((((uint32_t *)location)[0] & ~0xfffc) + != 0xe84c0000) + break; + if (((uint32_t *)location)[1] != 0x7c426214) + break; + /* + * If found, replace it with: + * addis r2, r12, (.TOC.-func)@ha + * addi r2, r12, (.TOC.-func)@l + */ + ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value); + ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); + break; + case R_PPC64_REL16_HA: /* Subtract location pointer */ value -= (unsigned long)location; -- cgit v1.2.3