summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2020-07-23 17:43:44 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2020-07-23 17:43:44 +1000
commit335aca5f65f1a39670944930131da5f2276f888f (patch)
treec8f41223681fc064f558083a2d46f712e43d8b66 /arch/powerpc/kernel
parent8ac9b9d61f0eceba6ce571e7527798465ae9a7c5 (diff)
parent7fa95f9adaee7e5cbb195d3359741120829e488b (diff)
downloadlinux-335aca5f65f1a39670944930131da5f2276f888f.tar.bz2
Merge branch 'scv' support into next
From Nick's cover letter: Linux powerpc new system call instruction and ABI System Call Vectored (scv) ABI ============================== The scv instruction is introduced with POWER9 / ISA3, it comes with an rfscv counter-part. The benefit of these instructions is performance (trading slower SRR0/1 with faster LR/CTR registers, and entering the kernel with MSR[EE] and MSR[RI] left enabled, which can reduce MSR updates. The scv instruction has 128 levels (not enough to cover the Linux system call space). Assignment and advertisement ---------------------------- The proposal is to assign scv levels conservatively, and advertise them with HWCAP feature bits as we add support for more. Linux has not enabled FSCR[SCV] yet, so executing the scv instruction will cause the kernel to log a "SCV facility unavilable" message, and deliver a SIGILL with ILL_ILLOPC to the process. Linux has defined a HWCAP2 bit PPC_FEATURE2_SCV for SCV support, but does not set it. This change allocates the zero level ('scv 0'), advertised with PPC_FEATURE2_SCV, which will be used to provide normal Linux system calls (equivalent to 'sc'). Attempting to execute scv with other levels will cause a SIGILL to be delivered the same as before, but will not log a "SCV facility unavailable" message (because the processor facility is enabled). Calling convention ------------------ The proposal is for scv 0 to provide the standard Linux system call ABI with the following differences from sc convention[1]: - LR is to be volatile across scv calls. This is necessary because the scv instruction clobbers LR. From previous discussion, this should be possible to deal with in GCC clobbers and CFI. - cr1 and cr5-cr7 are volatile. This matches the C ABI and would allow the kernel system call exit to avoid restoring the volatile cr registers (although we probably still would anyway to avoid information leaks). - Error handling: The consensus among kernel, glibc, and musl is to move to using negative return values in r3 rather than CR0[SO]=1 to indicate error, which matches most other architectures, and is closer to a function call. Notes ----- - r0,r4-r8 are documented as volatile in the ABI, but the kernel patch as submitted currently preserves them. This is to leave room for deciding which way to go with these. Some small benefit was found by preserving them[1] but I'm not convinced it's worth deviating from the C function call ABI just for this. Release code should follow the ABI. Previous discussions: https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208691.html https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209268.html [1] https://github.com/torvalds/linux/blob/master/Documentation/powerpc/syscall64-abi.rst [2] https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/209263.html
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S10
-rw-r--r--arch/powerpc/kernel/cputable.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1
-rw-r--r--arch/powerpc/kernel/entry_64.S171
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S150
-rw-r--r--arch/powerpc/kernel/process.c10
-rw-r--r--arch/powerpc/kernel/setup_64.c5
-rw-r--r--arch/powerpc/kernel/signal.c19
-rw-r--r--arch/powerpc/kernel/syscall_64.c32
9 files changed, 374 insertions, 27 deletions
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index efdcfa714106..86527d19348c 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -98,7 +98,7 @@ _GLOBAL(__setup_cpu_power10)
_GLOBAL(__setup_cpu_power9)
mflr r11
- bl __init_FSCR
+ bl __init_FSCR_power9
1: bl __init_PMU
bl __init_hvmode_206
mtlr r11
@@ -128,7 +128,7 @@ _GLOBAL(__restore_cpu_power10)
_GLOBAL(__restore_cpu_power9)
mflr r11
- bl __init_FSCR
+ bl __init_FSCR_power9
1: bl __init_PMU
mfmsr r3
rldicl. r0,r3,4,63
@@ -198,6 +198,12 @@ __init_FSCR_power10:
mtspr SPRN_FSCR, r3
// fall through
+__init_FSCR_power9:
+ mfspr r3, SPRN_FSCR
+ ori r3, r3, FSCR_SCV
+ mtspr SPRN_FSCR, r3
+ // fall through
+
__init_FSCR:
mfspr r3,SPRN_FSCR
ori r3,r3,FSCR_TAR|FSCR_EBB
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b4066354f073..3d406a9626e8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -120,7 +120,8 @@ extern void __restore_cpu_e6500(void);
#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
PPC_FEATURE2_ARCH_3_00 | \
PPC_FEATURE2_HAS_IEEE128 | \
- PPC_FEATURE2_DARN )
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV)
#define COMMON_USER_POWER10 COMMON_USER_POWER9
#define COMMON_USER2_POWER10 (COMMON_USER2_POWER9 | \
PPC_FEATURE2_ARCH_3_1 | \
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 0a16bb6fa07d..22ba7fba1147 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -621,6 +621,7 @@ static struct dt_cpu_feature_match __initdata
{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
{"smt", feat_enable_smt, 0},
{"interrupt-facilities", feat_enable, 0},
+ {"system-call-vectored", feat_enable, 0},
{"timer-facilities", feat_enable, 0},
{"timer-facilities-v3", feat_enable, 0},
{"debug-facilities", feat_enable, 0},
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2547c5dac07a..33a42e42c56f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -64,15 +64,173 @@ exception_marker:
.section ".text"
.align 7
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+ .globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+ bne .Ltabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+ INTERRUPT_TO_KERNEL
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+ ld r2,PACATOC(r13)
+ mfcr r12
+ li r11,0
+ /* Can we avoid saving r3-r8 in common case? */
+ std r3,GPR3(r1)
+ std r4,GPR4(r1)
+ std r5,GPR5(r1)
+ std r6,GPR6(r1)
+ std r7,GPR7(r1)
+ std r8,GPR8(r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_LINK(r1)
+ std r11,_CTR(r1)
+
+ li r11,\trapnr
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ std r3,ORIG_GPR3(r1)
+ addi r10,r1,STACK_FRAME_OVERHEAD
+ ld r11,exception_marker@toc(r2)
+ std r11,-16(r10) /* "regshere" marker */
+
+ /*
+ * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
+ * would clobber syscall parameters. Also we always enter with IRQs
+ * enabled and nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
+ *
+ * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
+ * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
+ */
+
+ /* Calling convention has r9 = orig r0, r10 = regs */
+ mr r9,r0
+ bl system_call_exception
+
+.Lsyscall_vectored_\name\()_exit:
+ addi r4,r1,STACK_FRAME_OVERHEAD
+ li r5,1 /* scv */
+ bl syscall_exit_prepare
+
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ cmpdi r3,0
+ bne .Lsyscall_vectored_\name\()_restore_regs
+
+ /* rfscv returns with LR->NIA and CTR->MSR */
+ mtlr r4
+ mtctr r5
+
+ /* Could zero these as per ABI, but we may consider a stricter ABI
+ * which preserves these if libc implementations can benefit, so
+ * restore them for now until further measurement is done. */
+ ld r0,GPR0(r1)
+ ld r4,GPR4(r1)
+ ld r5,GPR5(r1)
+ ld r6,GPR6(r1)
+ ld r7,GPR7(r1)
+ ld r8,GPR8(r1)
+ /* Zero volatile regs that may contain sensitive kernel data */
+ li r9,0
+ li r10,0
+ li r11,0
+ li r12,0
+ mtspr SPRN_XER,r0
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r13,GPR13(r1)
+ ld r1,GPR1(r1)
+ RFSCV_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+ li r3,0
+ mtmsrd r3,1
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+
+ ld r3,_CTR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_XER(r1)
+
+ REST_NVGPRS(r1)
+ ld r0,GPR0(r1)
+ mtcr r2
+ mtctr r3
+ mtlr r4
+ mtspr SPRN_XER,r5
+ REST_10GPRS(2, r1)
+ REST_2GPRS(12, r1)
+ ld r1,GPR1(r1)
+ RFI_TO_USER
+.endm
+
+system_call_vectored common 0x3000
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+
+/*
+ * Entered via kernel return set up by kernel/sstep.c, must match entry regs
+ */
+ .globl system_call_vectored_emulate
+system_call_vectored_emulate:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ b system_call_vectored_common
+#endif
+
+ .balign IFETCH_ALIGN_BYTES
.globl system_call_common
system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-_ASM_NOKPROBE_SYMBOL(system_call_common)
mr r10,r1
ld r1,PACAKSAVE(r13)
std r10,0(r1)
@@ -138,6 +296,7 @@ END_BTB_FLUSH_SECTION
.Lsyscall_exit:
addi r4,r1,STACK_FRAME_OVERHEAD
+ li r5,0 /* !scv */
bl syscall_exit_prepare
ld r2,_CCR(r1)
@@ -224,10 +383,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
b . /* prevent speculative execution */
#endif
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+ bl schedule_tail
+ REST_NVGPRS(r1)
+ li r3,0 /* fork() return value */
+ b .Lsyscall_vectored_common_exit
+#endif
+
_GLOBAL(ret_from_fork)
bl schedule_tail
REST_NVGPRS(r1)
- li r3,0
+ li r3,0 /* fork() return value */
b .Lsyscall_exit
_GLOBAL(ret_from_kernel_thread)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0fc8bad878b2..eda5ceef2381 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -508,8 +508,24 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.macro __GEN_COMMON_BODY name
.if IMASK
+ .if ! ISTACK
+ .error "No support for masked interrupt to use custom stack"
+ .endif
+
+ /* If coming from user, skip soft-mask tests. */
+ andi. r10,r12,MSR_PR
+ bne 2f
+
+ /* Kernel code running below __end_interrupts is implicitly
+ * soft-masked */
+ LOAD_HANDLER(r10, __end_interrupts)
+ cmpld r11,r10
+ li r10,IMASK
+ blt- 1f
+
+ /* Test the soft mask state against our interrupt's bit */
lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,IMASK
+1: andi. r10,r10,IMASK
/* Associate vector numbers with bits in paca->irq_happened */
.if IVEC == 0x500 || IVEC == 0xea0
li r10,PACA_IRQ_EE
@@ -540,7 +556,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.if ISTACK
andi. r10,r12,MSR_PR /* See if coming from user */
- mr r10,r1 /* Save r1 */
+2: mr r10,r1 /* Save r1 */
subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
beq- 100f
ld r1,PACAKSAVE(r13) /* kernel stack to use */
@@ -740,6 +756,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* guarantee they will be delivered virtually. Some conditions (see the ISA)
* cause exceptions to be delivered in real mode.
*
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
* It's impossible to receive interrupts below 0x300 via AIL.
*
* KVM: None of the virtual exceptions are from the guest. Anything that
@@ -749,8 +768,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* We layout physical memory as follows:
* 0x0000 - 0x00ff : Secondary processor spin code
* 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
- * 0x1900 - 0x3fff : Real mode trampolines
- * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
* 0x5900 - 0x6fff : Relon mode trampolines
* 0x7000 - 0x7fff : FWNMI data area
* 0x8000 - .... : Common interrupt handlers, remaining early
@@ -761,8 +780,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
* vectors there.
*/
OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
-OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000)
-OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900)
+OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
#ifdef CONFIG_PPC_POWERNV
@@ -798,6 +817,77 @@ USE_FIXED_SECTION(real_vectors)
.globl __start_interrupts
__start_interrupts:
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses below __end_interrupts as being
+ * soft-masked.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the below __end_interrupts text, at least of the following
+ * must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is set (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+ /* SCV 0 */
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_tramp
+#else
+ b system_call_vectored_common
+#endif
+ nop
+
+ /* SCV 1 - 127 */
+ .rept 127
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ li r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_sigill_tramp
+#else
+ b system_call_vectored_sigill
+#endif
+ .endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_common)
+ mtctr r10
+ bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill)
+ mtctr r10
+ bctr
+#endif
+
+
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x100)
@@ -2838,7 +2928,8 @@ masked_interrupt:
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
- /* returns to kernel where r13 must be set up, so don't restore it */
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ /* May return to masked low address where r13 is not set up */
.if \hsrr
HRFI_TO_KERNEL
.else
@@ -2946,6 +3037,47 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+ /* system call volatile */
+ mr r7,r13
+ GET_PACA(r13);
+ mr r8,r1
+ ld r1,PACAKSAVE(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+
+ mtctr r9
+ li r9,0
+ li r10,0
+ li r11,0
+ mr r1,r8
+ mr r13,r7
+ RFSCV
+
USE_TEXT_SECTION()
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
@@ -2997,6 +3129,10 @@ EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline)
USE_FIXED_SECTION(virt_trampolines)
/*
+ * All code below __end_interrupts is treated as soft-masked. If
+ * any code runs here with MSR[EE]=1, it must then cope with pending
+ * soft interrupt being raised (i.e., by ensuring it is replayed).
+ *
* The __end_interrupts marker must be past the out-of-line (OOL)
* handlers, so that they are copied to real address 0x100 when running
* a relocatable kernel. This ensures they can be reached from the short
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9be4759f3bbb..e4fcc817c46c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1618,6 +1618,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
+ extern void ret_from_fork_scv(void);
extern void ret_from_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
@@ -1654,7 +1655,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
if (usp)
childregs->gpr[1] = usp;
p->thread.regs = childregs;
- childregs->gpr[3] = 0; /* Result from fork() */
+ /* 64s sets this in ret_from_fork */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ childregs->gpr[3] = 0; /* Result from fork() */
if (clone_flags & CLONE_SETTLS) {
if (!is_32bit_task())
childregs->gpr[13] = tls;
@@ -1662,7 +1665,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
childregs->gpr[2] = tls;
}
- f = ret_from_fork;
+ if (trap_is_scv(regs))
+ f = ret_from_fork_scv;
+ else
+ f = ret_from_fork;
}
childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
sp -= STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0ba1ed77dc68..6be430107c6f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -196,7 +196,10 @@ static void __init configure_exceptions(void)
/* Under a PAPR hypervisor, we need hypercalls */
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
/* Enable AIL if possible */
- pseries_enable_reloc_on_exc();
+ if (!pseries_enable_reloc_on_exc()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
/*
* Tell the hypervisor that we want our exceptions to
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index b4143b6ff093..d15a98c758b8 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -205,8 +205,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
return;
/* error signalled ? */
- if (!(regs->ccr & 0x10000000))
+ if (trap_is_scv(regs)) {
+ /* 32-bit compat mode sign extend? */
+ if (!IS_ERR_VALUE(ret))
+ return;
+ ret = -ret;
+ } else if (!(regs->ccr & 0x10000000)) {
return;
+ }
switch (ret) {
case ERESTART_RESTARTBLOCK:
@@ -239,9 +245,14 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
regs->nip -= 4;
regs->result = 0;
} else {
- regs->result = -EINTR;
- regs->gpr[3] = EINTR;
- regs->ccr |= 0x10000000;
+ if (trap_is_scv(regs)) {
+ regs->result = -EINTR;
+ regs->gpr[3] = -EINTR;
+ } else {
+ regs->result = -EINTR;
+ regs->gpr[3] = EINTR;
+ regs->ccr |= 0x10000000;
+ }
}
}
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 5126f1d3184a..8e50818aa50b 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -60,6 +60,11 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(regs->trap == 0x7ff0)) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
/*
* We use the return value of do_syscall_trace_enter() as the
* syscall number. If the syscall was rejected for any reason
@@ -78,6 +83,11 @@ notrace long system_call_exception(long r3, long r4, long r5,
r8 = regs->gpr[8];
} else if (unlikely(r0 >= NR_syscalls)) {
+ if (unlikely(regs->trap == 0x7ff0)) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
return -ENOSYS;
}
@@ -105,16 +115,20 @@ notrace long system_call_exception(long r3, long r4, long r5,
* local irqs must be disabled. Returns false if the caller must re-enable
* them, check for new work, and try again.
*/
-static notrace inline bool prep_irq_for_enabled_exit(void)
+static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
{
/* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on();
/* This pattern matches prep_irq_for_idle */
- __hard_EE_RI_disable();
+ if (clear_ri)
+ __hard_EE_RI_disable();
+ else
+ __hard_irq_disable();
if (unlikely(lazy_irq_pending_nocheck())) {
/* Took an interrupt, may have more exit work to do. */
- __hard_RI_enable();
+ if (clear_ri)
+ __hard_RI_enable();
trace_hardirqs_off();
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
@@ -136,7 +150,8 @@ static notrace inline bool prep_irq_for_enabled_exit(void)
* because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
*/
notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ long scv)
{
unsigned long *ti_flagsp = &current_thread_info()->flags;
unsigned long ti_flags;
@@ -151,7 +166,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ti_flags = *ti_flagsp;
- if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
r3 = -r3;
regs->ccr |= 0x10000000; /* Set SO bit in CR */
@@ -218,7 +233,8 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ /* scv need not set RI=0 because SRRs are not used */
+ if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
local_irq_enable();
goto again;
}
@@ -290,7 +306,7 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ if (unlikely(!prep_irq_for_enabled_exit(true))) {
local_irq_enable();
local_irq_disable();
goto again;
@@ -353,7 +369,7 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit())) {
+ if (unlikely(!prep_irq_for_enabled_exit(true))) {
/*
* Can't local_irq_restore to replay if we were in
* interrupt context. Must replay directly.