diff options
Diffstat (limited to 'arch/powerpc/kernel')
64 files changed, 1972 insertions, 3066 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bf0bf1b900d2..fe2ef598e2ea 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -173,6 +173,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c2722ff36e98..b12d7c049bfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -110,9 +110,11 @@ int main(void) #ifdef CONFIG_BOOKE OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]); #endif +#ifdef CONFIG_PPC_FPU OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode); OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr); OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area); +#endif OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr); OFFSET(THREAD_LOAD_FP, thread_struct, load_fp); #ifdef CONFIG_ALTIVEC @@ -354,10 +356,15 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_PKEY + STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); + STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); +#endif #ifdef CONFIG_PPC_KUAP STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); #endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -398,47 +405,18 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp); - OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec); - OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs); - OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count); - OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest); - OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime); - OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32); - OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec); - OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec); - OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec); - OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); - OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); + OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); + OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 - OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); - OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); - OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); - OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size); - OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64); - OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec); - OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec); -#endif - OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec); - OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec); - OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec); - OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec); - OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec); - OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec); - /* timeval/timezone offsets for use by vdso */ - OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest); - OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime); - - /* Other bits used by the vdso */ - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_MAX, CLOCK_TAI); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(EINVAL, EINVAL); - DEFINE(KTIME_LOW_RES, KTIME_LOW_RES); + OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); + OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size); + OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size); + OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size); + OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map); + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map); +#else + OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); +#endif #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 65ab9fcebd31..6f903e9aa20b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -655,11 +655,27 @@ static unsigned int index_dir_to_cpu(struct cache_index_dir *index) * On big-core systems, each core has two groups of CPUs each of which * has its own L1-cache. The thread-siblings which share l1-cache with * @cpu can be obtained via cpu_smallcore_mask(). + * + * On some big-core systems, the L2 cache is shared only between some + * groups of siblings. This is already parsed and encoded in + * cpu_l2_cache_mask(). + * + * TODO: cache_lookup_or_instantiate() needs to be made aware of the + * "ibm,thread-groups" property so that cache->shared_cpu_map + * reflects the correct siblings on platforms that have this + * device-tree property. This helper function is only a stop-gap + * solution so that we report the correct siblings to the + * userspace via sysfs. */ -static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache) +static const struct cpumask *get_shared_cpu_map(struct cache_index_dir *index, struct cache *cache) { - if (cache->level == 1) - return cpu_smallcore_mask(cpu); + if (has_big_cores) { + int cpu = index_dir_to_cpu(index); + if (cache->level == 1) + return cpu_smallcore_mask(cpu); + if (cache->level == 2 && thread_group_shares_l2) + return cpu_l2_cache_mask(cpu); + } return &cache->shared_cpu_map; } @@ -670,17 +686,11 @@ show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bo struct cache_index_dir *index; struct cache *cache; const struct cpumask *mask; - int cpu; index = kobj_to_cache_index_dir(k); cache = index->cache; - if (has_big_cores) { - cpu = index_dir_to_cpu(index); - mask = get_big_core_shared_cpu_map(cpu, cache); - } else { - mask = &cache->shared_cpu_map; - } + mask = get_shared_cpu_map(index, cache); return cpumap_print_to_pagebuf(list, buf, mask); } diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1d308780e0d3..4bf33f1b4193 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -108,15 +108,6 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E200 -_GLOBAL(__setup_cpu_e200) - /* enable dedicated debug exception handling resources (Debug APU) */ - mfspr r3,SPRN_HID0 - ori r3,r3,HID0_DAPUEN@l - mtspr SPRN_HID0,r3 - b __setup_e200_ivors -#endif /* CONFIG_E200 */ - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S deleted file mode 100644 index 704e8b9501ee..000000000000 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - */ - -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cache.h> -#include <asm/book3s/64/mmu-hash.h> - -/* Entry: r3 = crap, r4 = ptr to cputable entry - * - * Note that we can be called twice for pseudo-PVRs - */ -_GLOBAL(__setup_cpu_power7) - mflr r11 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power7) - mflr r11 - mfmsr r3 - rldicl. r0,r3,4,63 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__setup_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__restore_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -__init_hvmode_206: - /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ - mfmsr r3 - rldicl. r0,r3,4,63 - bnelr - ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST) - andc r5,r5,r6 - std r5,CPU_SPEC_FEATURES(r4) - blr - -__init_LPCR_ISA206: - /* Setup a sane LPCR: - * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 - * - * LPES = 0b01 (HSRR0/1 used for 0x500) - * PECE = 0b111 - * DPFD = 4 - * HDICE = 0 - * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) - * VRMASD = 0b10000 (L=1, LP=00) - * - * Other bits untouched for now - */ - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 - - /* POWER9 has no VRMASD */ -__init_LPCR_ISA300: - rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 - ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,4 - rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 - clrrdi r3,r3,1 /* clear HDICE */ - li r5,4 - rldimi r3,r5, LPCR_VC_SH, 0 - mtspr SPRN_LPCR,r3 - isync - blr - -__init_FSCR_power10: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_PREFIX - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR_power9: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_SCV - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR: - mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_EBB - mtspr SPRN_FSCR,r3 - blr - -__init_HFSCR: - mfspr r3,SPRN_HFSCR - ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP - mtspr SPRN_HFSCR,r3 - blr - -__init_PMU_HV: - li r5,0 - mtspr SPRN_MMCRC,r5 - blr - -__init_PMU_HV_ISA207: - li r5,0 - mtspr SPRN_MMCRH,r5 - blr - -__init_PMU: - li r5,0 - mtspr SPRN_MMCRA,r5 - mtspr SPRN_MMCR0,r5 - mtspr SPRN_MMCR1,r5 - mtspr SPRN_MMCR2,r5 - blr - -__init_PMU_ISA207: - li r5,0 - mtspr SPRN_MMCRS,r5 - blr - -__init_PMU_ISA31: - li r5,0 - mtspr SPRN_MMCR3,r5 - LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) - mtspr SPRN_MMCRA,r5 - blr diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c new file mode 100644 index 000000000000..3cca88ee96d7 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, Jordan Niethe, IBM Corporation. + * + * This file contains low level CPU setup functions. + * Originally written in assembly by Benjamin Herrenschmidt & various other + * authors. + */ + +#include <asm/reg.h> +#include <asm/synch.h> +#include <linux/bitops.h> +#include <asm/cputable.h> +#include <asm/cpu_setup_power.h> + +/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ +static bool init_hvmode_206(struct cpu_spec *t) +{ + u64 msr; + + msr = mfmsr(); + if (msr & MSR_HV) + return true; + + t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST); + return false; +} + +static void init_LPCR_ISA300(u64 lpcr, u64 lpes) +{ + /* POWER9 has no VRMASD */ + lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES; + lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2; + lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD; + lpcr &= ~LPCR_HDICE; /* clear HDICE */ + lpcr |= (4ull << LPCR_VC_SH); + mtspr(SPRN_LPCR, lpcr); + isync(); +} + +/* + * Setup a sane LPCR: + * Called with initial LPCR and desired LPES 2-bit value + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ +static void init_LPCR_ISA206(u64 lpcr, u64 lpes) +{ + lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD; + init_LPCR_ISA300(lpcr, lpes); +} + +static void init_FSCR(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_TAR|FSCR_EBB; + mtspr(SPRN_FSCR, fscr); +} + +static void init_FSCR_power9(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_SCV; + mtspr(SPRN_FSCR, fscr); + init_FSCR(); +} + +static void init_FSCR_power10(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_PREFIX; + mtspr(SPRN_FSCR, fscr); + init_FSCR_power9(); +} + +static void init_HFSCR(void) +{ + u64 hfscr; + + hfscr = mfspr(SPRN_HFSCR); + hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\ + HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP; + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_PMU_HV(void) +{ + mtspr(SPRN_MMCRC, 0); +} + +static void init_PMU_HV_ISA207(void) +{ + mtspr(SPRN_MMCRH, 0); +} + +static void init_PMU(void) +{ + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static void init_PMU_ISA207(void) +{ + mtspr(SPRN_MMCRS, 0); +} + +static void init_PMU_ISA31(void) +{ + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +} + +/* + * Note that we can be called twice of pseudo-PVRs. + * The parameter offset is not used. + */ + +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) +{ + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __restore_cpu_power7(void) +{ + u64 msr; + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __restore_cpu_power8(void) +{ + u64 msr; + + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power9(); + init_PMU(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power9(void) +{ + u64 msr; + + init_FSCR_power9(); + init_PMU(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power10(void) +{ + u64 msr; + + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 29de58d4dfb7..65f35ec052d4 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -36,7 +36,6 @@ const char *powerpc_base_platform; * and ppc64 */ #ifdef CONFIG_PPC32 -extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); @@ -60,19 +59,15 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +#include <asm/cpu_setup_power.h> extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); -extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power7(void); -extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power8(void); -extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power9(void); -extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power10(void); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -616,46 +611,8 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_6xx - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, +#ifdef CONFIG_PPC_BOOK3S_32 +#ifdef CONFIG_PPC_BOOK3S_604 { /* 604 */ .pvr_mask = 0xffff0000, .pvr_value = 0x00040000, @@ -1145,6 +1102,47 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc7450", }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +#ifdef CONFIG_PPC_BOOK3S_603 + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, { /* 82xx (8240, 8245, 8260 are all 603e cores) */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00810000, @@ -1234,6 +1232,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1246,7 +1246,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, -#endif /* CONFIG_PPC_BOOK3S_6xx */ +#endif /* CONFIG_PPC_BOOK3S_604 */ +#endif /* CONFIG_PPC_BOOK3S_32 */ #ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, @@ -1540,6 +1541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_40x */ #ifdef CONFIG_44x +#ifndef CONFIG_PPC_47x { .pvr_mask = 0xf0000fff, .pvr_value = 0x40000850, @@ -1822,7 +1824,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, -#ifdef CONFIG_PPC_47x + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +#else /* CONFIG_PPC_47x */ { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1879,65 +1893,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, -#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, + .mmu_features = MMU_FTR_TYPE_47x, .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", } +#endif /* CONFIG_PPC_47x */ #endif /* CONFIG_44x */ -#ifdef CONFIG_E200 - { /* e200z5 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81000000, - .cpu_name = "e200z5", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* e200z6 */ - .pvr_mask = 0xfff00000, - .pvr_value = 0x81100000, - .cpu_name = "e200z6", - /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */ - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .machine_check = machine_check_e200, - .platform = "ppc5554", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E200 PPC)", - .cpu_features = CPU_FTRS_E200, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_EFP_SINGLE | - PPC_FEATURE_UNIFIED_CACHE, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_e200, - .machine_check = machine_check_e200, - .platform = "ppc5554", - } -#endif /* CONFIG_E200 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1098863e17ee..b5478b72c08c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -69,7 +69,6 @@ static int hv_mode; static struct { u64 lpcr; - u64 lpcr_clear; u64 hfscr; u64 fscr; u64 pcr; @@ -79,24 +78,7 @@ static void (*init_pmu_registers)(void); static void __restore_cpu_cpufeatures(void) { - u64 lpcr; - - /* - * LPCR is restored by the power on engine already. It can be changed - * after early init e.g., by radix enable, and we have no unified API - * for saving and restoring such SPRs. - * - * This ->restore hook should really be removed from idle and register - * restore moved directly into the idle restore code, because this code - * doesn't know how idle is implemented or what it needs restored here. - * - * The best we can do to accommodate secondary boot and idle restore - * for now is "or" LPCR with existing. - */ - lpcr = mfspr(SPRN_LPCR); - lpcr |= system_registers.lpcr; - lpcr &= ~system_registers.lpcr_clear; - mtspr(SPRN_LPCR, lpcr); + mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); @@ -273,13 +255,6 @@ static int __init feat_enable_idle_nap(struct dt_cpu_feature *f) return 1; } -static int __init feat_enable_align_dsisr(struct dt_cpu_feature *f) -{ - cur_cpu_spec->cpu_features &= ~CPU_FTR_NODSISRALIGN; - - return 1; -} - static int __init feat_enable_idle_stop(struct dt_cpu_feature *f) { u64 lpcr; @@ -317,7 +292,6 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; - system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR); lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -454,6 +428,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) @@ -641,7 +616,7 @@ static struct dt_cpu_feature_match __initdata {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST}, {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG}, {"idle-nap", feat_enable_idle_nap, 0}, - {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0}, + /* alignment-interrupt-dsisr ignored */ {"idle-stop", feat_enable_idle_stop, 0}, {"machine-check-power8", feat_enable_mce_power8, 0}, {"performance-monitor-power8", feat_enable_pmu_power8, 0}, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8cdc8bcde703..1c9b0ccc2172 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -234,7 +234,10 @@ transfer_to_handler_cont: mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r10 mtlr r9 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) 4: rlwinm r12,r12,0,~_TLF_NAPPING @@ -263,7 +266,10 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont) LOAD_REG_IMMEDIATE(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r0 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif reenable_mmu: /* @@ -321,7 +327,10 @@ stack_ovf: #endif mtspr SPRN_SRR0,r9 mtspr SPRN_SRR1,r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(stack_ovf) #endif @@ -439,15 +448,13 @@ syscall_exit_cont: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION +#ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 bne- 2f +#endif /* CONFIG_PPC_47x */ 1: -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x) -#endif /* CONFIG_44x */ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) @@ -470,7 +477,10 @@ syscall_exit_finish: #endif mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(syscall_exit_finish) #ifdef CONFIG_44x 2: li r7,0 @@ -600,7 +610,10 @@ ret_from_kernel_syscall: #endif mtspr SPRN_SRR0, r9 mtspr SPRN_SRR1, r10 - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall) /* @@ -671,7 +684,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD lwz r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except_full #ifdef CONFIG_PPC_BOOK3S_32 @@ -803,7 +816,10 @@ fast_exception_return: REST_GPR(9, r11) REST_GPR(12, r11) lwz r11,GPR11(r11) - RFI + rfi +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) @@ -948,10 +964,7 @@ restore_kuap: /* interrupts are hard-disabled at this point */ restore: -#ifdef CONFIG_44x -BEGIN_MMU_FTR_SECTION - b 1f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) +#if defined(CONFIG_44x) && !defined(CONFIG_PPC_47x) lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 @@ -1027,7 +1040,7 @@ exc_exit_restart: lwz r1,GPR1(r1) .globl exc_exit_restart_end exc_exit_restart_end: - RFI + rfi _ASM_NOKPROBE_SYMBOL(exc_exit_restart) _ASM_NOKPROBE_SYMBOL(exc_exit_restart_end) @@ -1356,7 +1369,7 @@ _GLOBAL(enter_rtas) stw r7, THREAD + RTAS_SP(r2) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI + rfi 1: tophys_novmstack r9, r1 #ifdef CONFIG_VMAP_STACK li r0, MSR_KERNEL & ~MSR_IR /* can take DTLB miss */ @@ -1371,6 +1384,6 @@ _GLOBAL(enter_rtas) stw r0, THREAD + RTAS_SP(r7) mtspr SPRN_SRR0,r8 mtspr SPRN_SRR1,r9 - RFI /* return to caller */ + rfi /* return to caller */ _ASM_NOKPROBE_SYMBOL(enter_rtas) #endif /* CONFIG_PPC_RTAS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2f3846192ec7..aa1af139d947 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -653,8 +653,8 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) kuap_check_amr r3, r4 ld r5,_MSR(r1) andi. r0,r5,MSR_PR - bne .Lfast_user_interrupt_return - kuap_restore_amr r3, r4 + bne .Lfast_user_interrupt_return_amr + kuap_kernel_restore r3, r4 andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return @@ -674,6 +674,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) cmpdi r3,0 bne- .Lrestore_nvgprs +.Lfast_user_interrupt_return_amr: + kuap_user_restore r3, r4 .Lfast_user_interrupt_return: ld r11,_NIP(r1) ld r12,_MSR(r1) @@ -967,7 +969,7 @@ _GLOBAL(enter_prom) mtsrr1 r11 rfi #else /* CONFIG_PPC_BOOK3E */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f579ce46eef2..74d07dc0bb48 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1023,7 +1023,7 @@ storage_fault_common: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b ret_from_except /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4d01f09ecf80..e02ad6fefa46 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1059,7 +1059,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL @@ -2875,7 +2875,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - kuap_restore_amr r9, r10 + kuap_kernel_restore r9, r10 EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -3259,7 +3259,7 @@ handle_page_fault: mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD ld r4,_DAR(r1) - bl bad_page_fault + bl __bad_page_fault b interrupt_return /* We have a data breakpoint exception - handle it */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490..c9e2819b095a 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <asm/firmware.h> +#include <asm/kvm_guest.h> #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; @@ -21,17 +22,19 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +DEFINE_STATIC_KEY_FALSE(kvm_guest); +bool check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return 0; + return false; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + return false; - return 1; + static_branch_enable(&kvm_guest); + return true; } #endif diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 7c767765071d..541664d95702 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -40,38 +40,31 @@ .macro EXCEPTION_PROLOG_1 for_rtas=0 #ifdef CONFIG_VMAP_STACK - mr r11, r1 + mtspr SPRN_SPRG_SCRATCH2,r1 subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r1,SPRN_SPRG_THREAD lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE +1: + mtcrf 0x7f, r1 + bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ beq 1f mfspr r11,SPRN_SPRG_THREAD lwz r11,TASK_STACK-THREAD(r11) addi r11, r11, THREAD_SIZE - INT_FRAME_SIZE -#endif -1: - tophys_novmstack r11, r11 -#ifdef CONFIG_VMAP_STACK - mtcrf 0x7f, r1 - bt 32 - THREAD_ALIGN_SHIFT, stack_overflow +1: tophys(r11, r11) #endif .endm .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0 #ifdef CONFIG_VMAP_STACK - mtcr r10 - li r10, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ - mtmsr r10 + li r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */ + mtmsr r11 isync -#else - stw r10,_CCR(r11) /* save registers */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH0 -#ifdef CONFIG_VMAP_STACK + mfspr r11, SPRN_SPRG_SCRATCH2 stw r11,GPR1(r1) stw r11,0(r1) mr r11, r1 @@ -80,14 +73,12 @@ stw r1,0(r11) tovirt(r1, r11) /* set new kernel sp */ #endif + stw r10,_CCR(r11) /* save registers */ stw r12,GPR12(r11) stw r9,GPR9(r11) - stw r10,GPR10(r11) -#ifdef CONFIG_VMAP_STACK - mfcr r10 - stw r10, _CCR(r11) -#endif + mfspr r10,SPRN_SPRG_SCRATCH0 mfspr r12,SPRN_SPRG_SCRATCH1 + stw r10,GPR10(r11) stw r12,GPR11(r11) mflr r10 stw r10,_LINK(r11) @@ -101,7 +92,6 @@ stw r10, _DSISR(r11) .endif lwz r9, SRR1(r12) - andi. r10, r9, MSR_PR lwz r12, SRR0(r12) #else mfspr r12,SPRN_SRR0 @@ -222,7 +212,10 @@ #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ +#ifdef CONFIG_40x + b . /* Prevent prefetch past rfi */ +#endif 99: b ret_from_kernel_syscall .endm diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1510b2a56669..ece7f97bafff 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -41,6 +41,11 @@ #include <asm/ppc-opcode.h> #include <asm/export.h> #include <asm/feature-fixups.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif /* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -417,6 +422,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -445,10 +454,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -829,7 +834,7 @@ __secondary_start: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* @@ -865,8 +870,7 @@ enable_64b_mode: oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_64BIT | MSR_ISF)@highest - sldi r12,r12,48 + LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 isync @@ -966,7 +970,7 @@ start_here_multiplatform: ld r4,PACAKMSR(r13) mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ /* This is where all platforms converge execution */ @@ -990,7 +994,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ee0bfebc375f..52702f3db6df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -43,16 +43,6 @@ .endm /* - * We need an ITLB miss handler for kernel addresses if: - * - Either we have modules - * - Or we have not pinned the first 8M - */ -#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ - defined(CONFIG_DEBUG_PAGEALLOC) -#define ITLB_MISS_KERNEL 1 -#endif - -/* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ @@ -190,32 +180,31 @@ SystemCall: */ #ifdef CONFIG_8xx_CPU15 -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) \ - addi addr, addr, PAGE_SIZE; \ - tlbie addr; \ - addi addr, addr, -(PAGE_SIZE << 1); \ - tlbie addr; \ - addi addr, addr, PAGE_SIZE +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \ + addi tmp, addr, PAGE_SIZE; \ + tlbie tmp; \ + addi tmp, addr, -PAGE_SIZE; \ + tlbie tmp #else -#define INVALIDATE_ADJACENT_PAGES_CPU15(addr) +#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) #endif InstructionTLBMiss: - mtspr SPRN_SPRG_SCRATCH0, r10 - mtspr SPRN_SPRG_SCRATCH1, r11 + mtspr SPRN_SPRG_SCRATCH2, r10 + mtspr SPRN_M_TW, r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r10) + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES mfcr r11 compare_to_kernel_boundary r10, r10 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef ITLB_MISS_KERNEL +#ifdef CONFIG_MODULES blt+ 3f rlwinm r10, r10, 0, 20, 31 oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha @@ -241,8 +230,8 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ -0: mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 +0: mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -251,14 +240,14 @@ InstructionTLBMiss: 0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_SPRG_SCRATCH0 - mfspr r11, SPRN_SPRG_SCRATCH1 + mfspr r10, SPRN_SPRG_SCRATCH2 + mfspr r11, SPRN_M_TW rfi #endif . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_DAR, r10 + mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 mfcr r11 @@ -297,11 +286,11 @@ DataStoreTLBMiss: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + mtspr SPRN_DAR, r11 /* Tag DAR */ /* Restore registers */ -0: mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ +0: mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi patch_site 0b, patch__dtlbmiss_exit_1 @@ -311,8 +300,7 @@ DataStoreTLBMiss: 0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) addi r10, r10, 1 stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0) - mfspr r10, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 mfspr r11, SPRN_M_TW rfi #endif @@ -619,10 +607,6 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif -#ifndef CONFIG_PIN_TLB_TEXT - li r0, 0 - mtspr SPRN_MI_CTR, r0 -#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -718,7 +702,6 @@ initial_mmu: mtspr SPRN_DER, r8 blr -#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -740,7 +723,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia -#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -761,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 -#endif + LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) @@ -819,7 +801,6 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi -#endif /* CONFIG_PIN_TLB */ /* * We put a few things here that have to be page-aligned. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index a0dda2a1f2df..349bf3f0c3af 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -155,10 +155,8 @@ __after_mmu_off: bl initial_bats bl load_segment_registers -BEGIN_MMU_FTR_SECTION bl reloc_offset bl early_hash_table -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -207,7 +205,7 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - RFI /* enables MMU */ + rfi /* enables MMU */ /* * We need __secondary_hold as a place to hold the other cpus on @@ -288,51 +286,35 @@ MachineCheck: DO_KVM 0x300 DataAccess: #ifdef CONFIG_VMAP_STACK - mtspr SPRN_SPRG_SCRATCH0,r10 - mfspr r10, SPRN_SPRG_THREAD BEGIN_MMU_FTR_SECTION + mtspr SPRN_SPRG_SCRATCH2,r10 + mfspr r10, SPRN_SPRG_THREAD stw r11, THR11(r10) mfspr r10, SPRN_DSISR mfcr r11 -#ifdef CONFIG_PPC_KUAP - andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif mfspr r10, SPRN_SPRG_THREAD beq hash_page_dsi .Lhash_page_dsi_cont: mtcr r11 lwz r11, THR11(r10) -END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) - mtspr SPRN_SPRG_SCRATCH1,r11 - mfspr r11, SPRN_DAR - stw r11, DAR(r10) - mfspr r11, SPRN_DSISR - stw r11, DSISR(r10) - mfspr r11, SPRN_SRR0 - stw r11, SRR0(r10) - mfspr r11, SPRN_SRR1 /* check whether user or kernel */ - stw r11, SRR1(r10) - mfcr r10 - andi. r11, r11, MSR_PR - + mfspr r10, SPRN_SPRG_SCRATCH2 +MMU_FTR_SECTION_ELSE + b 1f +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) +1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1 EXCEPTION_PROLOG_1 b handle_page_fault_tramp_1 #else /* CONFIG_VMAP_STACK */ EXCEPTION_PROLOG handle_dar_dsisr=1 get_and_save_dar_dsisr_on_stack r4, r5, r11 BEGIN_MMU_FTR_SECTION -#ifdef CONFIG_PPC_KUAP - andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h -#else andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h -#endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 -FTR_SECTION_ELSE +MMU_FTR_SECTION_ELSE b handle_page_fault_tramp_2 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) #endif /* CONFIG_VMAP_STACK */ @@ -394,6 +376,7 @@ Alignment: . = 0x800 DO_KVM 0x800 FPUnavailable: +#ifdef CONFIG_PPC_FPU BEGIN_FTR_SECTION /* * Certain Freescale cores don't have a FPU and treat fp instructions @@ -407,6 +390,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) +#else + b ProgramCheck +#endif /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) @@ -453,13 +439,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) +#ifdef CONFIG_MODULES lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) + rlwinm r2, r2, 28, 0xfffff000 +#ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -519,8 +506,9 @@ DataLoadTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -595,8 +583,9 @@ DataStoreTLBMiss: mfspr r3,SPRN_DMISS lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 - mfspr r2, SPRN_SPRG_PGDIR + mfspr r2, SPRN_SDR1 li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -757,14 +746,14 @@ fast_hash_page_return: /* DSI */ mtcr r11 lwz r11, THR11(r10) - mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + mfspr r10, SPRN_SPRG_SCRATCH2 + rfi 1: /* ISI */ mtcr r11 mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r10, SPRN_SPRG_SCRATCH0 - RFI + rfi stack_overflow: vmap_stack_overflow_exception @@ -889,9 +878,12 @@ __secondary_start: tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -899,7 +891,7 @@ __secondary_start: ori r3,r3,start_secondary@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi #endif /* CONFIG_SMP */ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -920,9 +912,6 @@ early_hash_table: lis r6, early_hash - PAGE_OFFSET@h ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 - lis r6, early_hash@h - addis r3, r3, Hash@ha - stw r6, Hash@l(r3) blr load_up_mmu: @@ -931,11 +920,13 @@ load_up_mmu: tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ TLBSYNC /* ... on all CPUs */ +BEGIN_MMU_FTR_SECTION /* Load the SDR1 register (hash table base & size) */ lis r6,_SDR1@ha tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* Load the BAT registers with the values set up by MMU_init. */ lis r3,BATS@ha @@ -991,9 +982,12 @@ start_here: tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ mtspr SPRN_SPRG_THREAD,r4 +BEGIN_MMU_FTR_SECTION lis r4, (swapper_pg_dir - PAGE_OFFSET)@h ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) /* stack */ lis r1,init_thread_union@ha @@ -1027,7 +1021,7 @@ start_here: .align 4 mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 - RFI + rfi /* Load up the kernel context */ 2: bl load_up_mmu @@ -1051,7 +1045,7 @@ start_here: ori r3,r3,start_kernel@l mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - RFI + rfi /* * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); @@ -1073,16 +1067,22 @@ _ENTRY(switch_mmu_context) li r0,NUM_USER_SEGMENTS mtctr r0 - lwz r4, MM_PGD(r4) #ifdef CONFIG_BDI_SWITCH /* Context switch the PTE pointer for the Abatron BDI2000. * The PGDIR is passed as second argument. */ + lwz r4, MM_PGD(r4) lis r5, abatron_pteptrs@ha stw r4, abatron_pteptrs@l + 0x4(r5) #endif +BEGIN_MMU_FTR_SECTION +#ifndef CONFIG_BDI_SWITCH + lwz r4, MM_PGD(r4) +#endif tophys(r4, r4) - mtspr SPRN_SPRG_PGDIR, r4 + rlwinm r4, r4, 4, 0xffff01ff + mtspr SPRN_SDR1, r4 +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) li r4,0 isync 3: @@ -1166,7 +1166,7 @@ _ENTRY(update_bats) .align 4 mtspr SPRN_SRR0, r4 mtspr SPRN_SRR1, r3 - RFI + rfi 1: bl clear_bats lis r3, BATS@ha addi r3, r3, BATS@l @@ -1185,7 +1185,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtmsr r3 mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r6 - RFI + rfi flush_tlbs: lis r10, 0x40 @@ -1206,7 +1206,7 @@ mmu_off: mtspr SPRN_SRR0,r4 mtspr SPRN_SRR1,r3 sync - RFI + rfi /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ initial_bats: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 71c359d438b5..74e230c200fb 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -176,7 +176,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #endif mtspr SPRN_SRR1,r10 mtspr SPRN_SRR0,r11 - RFI /* jump to handler, enable MMU */ + rfi /* jump to handler, enable MMU */ 99: b ret_from_kernel_syscall .endm @@ -185,7 +185,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) * * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check - * On e200 we have critical and debug (machine check occurs via critical) * * Additionally we reserve a SPRG for each priority level so we can free up a * GPR to use as the base for indirect access to the exception stacks. This @@ -201,7 +200,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) #define MC_STACK_BASE mcheckirq_ctx #define CRIT_STACK_BASE critirq_ctx -/* only on e500mc/e200 */ +/* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx #define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 586a6ac501e9..fdd4d274c245 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -187,9 +187,6 @@ set_ivor: /* Setup the defaults for TLB entries */ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l -#ifdef CONFIG_E200 - oris r2,r2,MAS4_TLBSELD(1)@h -#endif mtspr SPRN_MAS4, r2 #if !defined(CONFIG_BDI_SWITCH) @@ -362,13 +359,7 @@ interrupt_base: CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ -#ifdef CONFIG_E200 - /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ - machine_check_exception) -#else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) -#endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) @@ -400,15 +391,9 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else -#ifdef CONFIG_E200 - /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_STD) -#else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ unknown_exception, EXC_XFER_STD) #endif -#endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) @@ -625,7 +610,7 @@ END_BTB_FLUSH_SECTION mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage -/* Define SPE handlers for e200 and e500v2 */ +/* Define SPE handlers for e500v2 */ #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -807,31 +792,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #endif 3: mtspr SPRN_MAS2, r12 -#ifdef CONFIG_E200 - /* Round robin TLB1 entries assignment */ - mfspr r12, SPRN_MAS0 - - /* Extract TLB1CFG(NENTRY) */ - mfspr r11, SPRN_TLB1CFG - andi. r11, r11, 0xfff - - /* Extract MAS0(NV) */ - andi. r13, r12, 0xfff - addi r13, r13, 1 - cmpw 0, r13, r11 - addi r12, r12, 1 - - /* check if we need to wrap */ - blt 7f - - /* wrap back to first free tlbcam entry */ - lis r13, tlbcam_index@ha - lwz r13, tlbcam_index@l(r13) - rlwimi r12, r13, 0, 20, 31 -7: - mtspr SPRN_MAS0,r12 -#endif /* CONFIG_E200 */ - tlb_write_entry: tlbwe @@ -933,21 +893,6 @@ get_phys_addr: * Global functions */ -#ifdef CONFIG_E200 -/* Adjust or setup IVORs for e200 */ -_GLOBAL(__setup_e200_ivors) - li r3,DebugDebug@l - mtspr SPRN_IVOR15,r3 - li r3,SPEUnavailable@l - mtspr SPRN_IVOR32,r3 - li r3,SPEFloatingPointData@l - mtspr SPRN_IVOR33,r3 - li r3,SPEFloatingPointRound@l - mtspr SPRN_IVOR34,r3 - sync - blr -#endif - #ifdef CONFIG_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f4e8f21046f5..8fc7a14e4d71 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -499,6 +499,11 @@ static bool is_larx_stcx_instr(int type) return type == LARX || type == STCX; } +static bool is_octword_vsx_instr(int type, int size) +{ + return ((type == LOAD_VSX || type == STORE_VSX) && size == 32); +} + /* * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. @@ -549,6 +554,58 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, return true; } +static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, + int *hit, unsigned long ea) +{ + int i; + unsigned long hw_end_addr; + + /* + * Handle spurious exception only when any bp_per_reg is set. + * Otherwise this might be created by xmon and not actually a + * spurious exception. + */ + for (i = 0; i < nr_wp_slots(); i++) { + if (!info[i]) + continue; + + hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + + /* + * Ending address of DAWR range is less than starting + * address of op. + */ + if ((hw_end_addr - 1) >= ea) + continue; + + /* + * Those addresses need to be in the same or in two + * consecutive 512B blocks; + */ + if (((hw_end_addr - 1) >> 10) != (ea >> 10)) + continue; + + /* + * 'op address + 64B' generates an address that has a + * carry into bit 52 (crosses 2K boundary). + */ + if ((ea & 0x800) == ((ea + 64) & 0x800)) + continue; + + break; + } + + if (i == nr_wp_slots()) + return; + + for (i = 0; i < nr_wp_slots(); i++) { + if (info[i]) { + hit[i] = 1; + info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + } + } +} + int hw_breakpoint_handler(struct die_args *args) { bool err = false; @@ -607,8 +664,14 @@ int hw_breakpoint_handler(struct die_args *args) goto reset; if (!nr_hit) { - rc = NOTIFY_DONE; - goto out; + /* Workaround for Power10 DD1 */ + if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && + is_octword_vsx_instr(type, size)) { + handle_p10dd1_spurious_exception(info, hit, ea); + } else { + rc = NOTIFY_DONE; + goto out; + } } /* diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 9fe4fb3b08aa..72862a4d3a5d 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -11,177 +11,11 @@ #include <asm/pci-bridge.h> #include <asm/isa-bridge.h> -/* - * Here comes the ppc64 implementation of the IOMAP - * interfaces. - */ -unsigned int ioread8(const void __iomem *addr) -{ - return readb(addr); -} -unsigned int ioread16(const void __iomem *addr) -{ - return readw(addr); -} -unsigned int ioread16be(const void __iomem *addr) -{ - return readw_be(addr); -} -unsigned int ioread32(const void __iomem *addr) -{ - return readl(addr); -} -unsigned int ioread32be(const void __iomem *addr) -{ - return readl_be(addr); -} -EXPORT_SYMBOL(ioread8); -EXPORT_SYMBOL(ioread16); -EXPORT_SYMBOL(ioread16be); -EXPORT_SYMBOL(ioread32); -EXPORT_SYMBOL(ioread32be); -#ifdef __powerpc64__ -u64 ioread64(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_lo_hi(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64_hi_lo(const void __iomem *addr) -{ - return readq(addr); -} -u64 ioread64be(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_lo_hi(const void __iomem *addr) -{ - return readq_be(addr); -} -u64 ioread64be_hi_lo(const void __iomem *addr) -{ - return readq_be(addr); -} -EXPORT_SYMBOL(ioread64); -EXPORT_SYMBOL(ioread64_lo_hi); -EXPORT_SYMBOL(ioread64_hi_lo); -EXPORT_SYMBOL(ioread64be); -EXPORT_SYMBOL(ioread64be_lo_hi); -EXPORT_SYMBOL(ioread64be_hi_lo); -#endif /* __powerpc64__ */ - -void iowrite8(u8 val, void __iomem *addr) -{ - writeb(val, addr); -} -void iowrite16(u16 val, void __iomem *addr) -{ - writew(val, addr); -} -void iowrite16be(u16 val, void __iomem *addr) -{ - writew_be(val, addr); -} -void iowrite32(u32 val, void __iomem *addr) -{ - writel(val, addr); -} -void iowrite32be(u32 val, void __iomem *addr) -{ - writel_be(val, addr); -} -EXPORT_SYMBOL(iowrite8); -EXPORT_SYMBOL(iowrite16); -EXPORT_SYMBOL(iowrite16be); -EXPORT_SYMBOL(iowrite32); -EXPORT_SYMBOL(iowrite32be); -#ifdef __powerpc64__ -void iowrite64(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_lo_hi(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64_hi_lo(u64 val, void __iomem *addr) -{ - writeq(val, addr); -} -void iowrite64be(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_lo_hi(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -void iowrite64be_hi_lo(u64 val, void __iomem *addr) -{ - writeq_be(val, addr); -} -EXPORT_SYMBOL(iowrite64); -EXPORT_SYMBOL(iowrite64_lo_hi); -EXPORT_SYMBOL(iowrite64_hi_lo); -EXPORT_SYMBOL(iowrite64be); -EXPORT_SYMBOL(iowrite64be_lo_hi); -EXPORT_SYMBOL(iowrite64be_hi_lo); -#endif /* __powerpc64__ */ - -/* - * These are the "repeat read/write" functions. Note the - * non-CPU byte order. We do things in "IO byteorder" - * here. - * - * FIXME! We could make these do EEH handling if we really - * wanted. Not clear if we do. - */ -void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsb(addr, dst, count); -} -void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsw(addr, dst, count); -} -void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count) -{ - readsl(addr, dst, count); -} -EXPORT_SYMBOL(ioread8_rep); -EXPORT_SYMBOL(ioread16_rep); -EXPORT_SYMBOL(ioread32_rep); - -void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesb(addr, src, count); -} -void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesw(addr, src, count); -} -void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) -{ - writesl(addr, src, count); -} -EXPORT_SYMBOL(iowrite8_rep); -EXPORT_SYMBOL(iowrite16_rep); -EXPORT_SYMBOL(iowrite32_rep); - void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); } - -void ioport_unmap(void __iomem *addr) -{ - /* Nothing to do */ -} EXPORT_SYMBOL(ioport_map); -EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem *addr) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7d0f7682d01d..6b1eca53e36c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -102,14 +102,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace int decrementer_check_overflow(void) -{ - u64 now = get_tb(); - u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); - - return now >= *next_tb; -} - #ifdef CONFIG_PPC_BOOK3E /* This is called whenever we are re-enabling interrupts @@ -142,35 +134,6 @@ notrace unsigned int __check_irq_replay(void) trace_hardirqs_on(); trace_hardirqs_off(); - /* - * We are always hard disabled here, but PACA_IRQ_HARD_DIS may - * not be set, which means interrupts have only just been hard - * disabled as part of the local_irq_restore or interrupt return - * code. In that case, skip the decrementr check becaus it's - * expensive to read the TB. - * - * HARD_DIS then gets cleared here, but it's reconciled later. - * Either local_irq_disable will replay the interrupt and that - * will reconcile state like other hard interrupts. Or interrupt - * retur will replay the interrupt and in that case it sets - * PACA_IRQ_HARD_DIS by hand (see comments in entry_64.S). - */ - if (happened & PACA_IRQ_HARD_DIS) { - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) { - local_paca->irq_happened |= PACA_IRQ_DEC; - happened |= PACA_IRQ_DEC; - } - } - } - if (happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; return 0x900; @@ -186,6 +149,9 @@ notrace unsigned int __check_irq_replay(void) return 0x280; } + if (happened & PACA_IRQ_HARD_DIS) + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -229,18 +195,6 @@ again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(mfmsr() & MSR_EE); - if (happened & PACA_IRQ_HARD_DIS) { - /* - * We may have missed a decrementer interrupt if hard disabled. - * Check the decrementer register in case we had a rollover - * while hard disabled. - */ - if (!(happened & PACA_IRQ_DEC)) { - if (decrementer_check_overflow()) - happened |= PACA_IRQ_DEC; - } - } - /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. @@ -345,6 +299,7 @@ notrace void arch_local_irq_restore(unsigned long mask) if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) WARN_ON_ONCE(!(mfmsr() & MSR_EE)); __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; } else { /* * We should already be hard disabled here. We had bugs diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 63702c0badb9..9f3e133b57b7 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -555,7 +555,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, } printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", - level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + level, evt->cpu, sevstr, in_guest ? "Guest" : "", err_type, subtype, dar_str, evt->disposition == MCE_DISPOSITION_RECOVERED ? "Recovered" : "Not recovered"); @@ -577,7 +577,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, #ifdef CONFIG_PPC_BOOK3S_64 /* Display faulty slb contents for SLB errors. */ - if (evt->error_type == MCE_ERROR_TYPE_SLB) + if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); #endif } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index b7e173754a2e..667104d4c455 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -62,6 +62,20 @@ out: return pfn; } +static bool mce_in_guest(void) +{ +#ifdef CONFIG_KVM_BOOK3S_HANDLER + /* + * If machine check is hit when in guest context or low level KVM + * code, avoid looking up any translations or making any attempts + * to recover, just record the event and pass to KVM. + */ + if (get_paca()->kvm_hstate.in_guest) + return true; +#endif + return false; +} + /* flush SLBs and reload */ #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void) @@ -69,14 +83,6 @@ void flush_and_reload_slb(void) /* Invalidate all SLBs */ slb_flush_all_realmode(); -#ifdef CONFIG_KVM_BOOK3S_HANDLER - /* - * If machine check is hit when in guest or in transition, we will - * only flush the SLBs and continue. - */ - if (get_paca()->kvm_hstate.in_guest) - return; -#endif if (early_radix_enabled()) return; @@ -91,7 +97,7 @@ void flush_and_reload_slb(void) } #endif -static void flush_erat(void) +void flush_erat(void) { #ifdef CONFIG_PPC_BOOK3S_64 if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { @@ -490,19 +496,21 @@ static int mce_handle_ierror(struct pt_regs *regs, if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - handled = mce_flush(MCE_FLUSH_SLB); - break; - case MCE_ERROR_TYPE_ERAT: - handled = mce_flush(MCE_FLUSH_ERAT); - break; - case MCE_ERROR_TYPE_TLB: - handled = mce_flush(MCE_FLUSH_TLB); - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + handled = mce_flush(MCE_FLUSH_SLB); + break; + case MCE_ERROR_TYPE_ERAT: + handled = mce_flush(MCE_FLUSH_ERAT); + break; + case MCE_ERROR_TYPE_TLB: + handled = mce_flush(MCE_FLUSH_TLB); + break; + } } /* now fill in mce_error_info */ @@ -534,7 +542,7 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; - if (table[i].nip_valid) { + if (table[i].nip_valid && !mce_in_guest()) { *addr = regs->nip; if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { @@ -577,22 +585,24 @@ static int mce_handle_derror(struct pt_regs *regs, if (!(dsisr & table[i].dsisr_value)) continue; - /* attempt to correct the error */ - switch (table[i].error_type) { - case MCE_ERROR_TYPE_SLB: - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - if (mce_flush(MCE_FLUSH_SLB)) - handled = 1; - break; - case MCE_ERROR_TYPE_ERAT: - if (mce_flush(MCE_FLUSH_ERAT)) - handled = 1; - break; - case MCE_ERROR_TYPE_TLB: - if (mce_flush(MCE_FLUSH_TLB)) - handled = 1; - break; + if (!mce_in_guest()) { + /* attempt to correct the error */ + switch (table[i].error_type) { + case MCE_ERROR_TYPE_SLB: + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + if (mce_flush(MCE_FLUSH_SLB)) + handled = 1; + break; + case MCE_ERROR_TYPE_ERAT: + if (mce_flush(MCE_FLUSH_ERAT)) + handled = 1; + break; + case MCE_ERROR_TYPE_TLB: + if (mce_flush(MCE_FLUSH_TLB)) + handled = 1; + break; + } } /* @@ -634,7 +644,7 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->sync_error && + else if (mce_err->sync_error && !mce_in_guest() && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -662,7 +672,8 @@ static int mce_handle_derror(struct pt_regs *regs, static long mce_handle_ue_error(struct pt_regs *regs, struct mce_error_info *mce_err) { - long handled = 0; + if (mce_in_guest()) + return 0; mce_common_process_ue(regs, mce_err); if (mce_err->ignore_event) @@ -677,9 +688,10 @@ static long mce_handle_ue_error(struct pt_regs *regs, if (ppc_md.mce_check_early_recovery) { if (ppc_md.mce_check_early_recovery(regs)) - handled = 1; + return 1; } - return handled; + + return 0; } static long mce_handle_error(struct pt_regs *regs, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0ad15768d762..7f5aae3c387d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -208,7 +208,7 @@ static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) struct paca_struct **paca_ptrs __read_mostly; EXPORT_SYMBOL(paca_ptrs); -void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu) +void __init initialise_paca(struct paca_struct *new_paca, int cpu) { #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; @@ -241,7 +241,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int } /* Put the paca pointer into r13 and SPRG_PACA */ -void __nostackprotector setup_paca(struct paca_struct *new_paca) +void setup_paca(struct paca_struct *new_paca) { /* Setup r13 */ local_paca = new_paca; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be108616a721..2b555997b295 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -353,6 +353,55 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) return NULL; } +struct pci_intx_virq { + int virq; + struct kref kref; + struct list_head list_node; +}; + +static LIST_HEAD(intx_list); +static DEFINE_MUTEX(intx_mutex); + +static void ppc_pci_intx_release(struct kref *kref) +{ + struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref); + + list_del(&vi->list_node); + irq_dispose_mapping(vi->virq); + kfree(vi); +} + +static int ppc_pci_unmap_irq_line(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + + if (action == BUS_NOTIFY_DEL_DEVICE) { + struct pci_intx_virq *vi; + + mutex_lock(&intx_mutex); + list_for_each_entry(vi, &intx_list, list_node) { + if (vi->virq == pdev->irq) { + kref_put(&vi->kref, ppc_pci_intx_release); + break; + } + } + mutex_unlock(&intx_mutex); + } + + return NOTIFY_DONE; +} + +static struct notifier_block ppc_pci_unmap_irq_notifier = { + .notifier_call = ppc_pci_unmap_irq_line, +}; + +static int ppc_pci_register_irq_notifier(void) +{ + return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier); +} +arch_initcall(ppc_pci_register_irq_notifier); + /* * Reads the interrupt pin to determine if interrupt is use by card. * If the interrupt is used, then gets the interrupt line from the @@ -361,6 +410,12 @@ struct pci_controller *pci_find_controller_for_domain(int domain_nr) static int pci_read_irq_line(struct pci_dev *pci_dev) { int virq; + struct pci_intx_virq *vi, *vitmp; + + /* Preallocate vi as rewind is complex if this fails after mapping */ + vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL); + if (!vi) + return -1; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -377,12 +432,12 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) * function. */ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin)) - return -1; + goto error_exit; if (pin == 0) - return -1; + goto error_exit; if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) || line == 0xff || line == 0) { - return -1; + goto error_exit; } pr_debug(" No map ! Using line %d (pin %d) from PCI config\n", line, pin); @@ -394,14 +449,33 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) if (!virq) { pr_debug(" Failed to map !\n"); - return -1; + goto error_exit; } pr_debug(" Mapped to linux irq %d\n", virq); pci_dev->irq = virq; + mutex_lock(&intx_mutex); + list_for_each_entry(vitmp, &intx_list, list_node) { + if (vitmp->virq == virq) { + kref_get(&vitmp->kref); + kfree(vi); + vi = NULL; + break; + } + } + if (vi) { + vi->virq = virq; + kref_init(&vi->kref); + list_add_tail(&vi->list_node, &intx_list); + } + mutex_unlock(&intx_mutex); + return 0; +error_exit: + kfree(vi); + return -1; } /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d421a2c7f822..a66f435dabbf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -589,7 +589,6 @@ static void save_all(struct task_struct *tsk) __giveup_spe(tsk); msr_check_and_clear(msr_all_available); - thread_pkey_regs_save(&tsk->thread); } void flush_all_to_thread(struct task_struct *tsk) @@ -807,29 +806,6 @@ static void switch_hw_breakpoint(struct task_struct *new) #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -#ifdef CONFIG_PPC_ADV_DEBUG_REGS -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DAC1, dabr); - if (IS_ENABLED(CONFIG_PPC_47x)) - isync(); - return 0; -} -#elif defined(CONFIG_PPC_BOOK3S) -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - mtspr(SPRN_DABR, dabr); - if (cpu_has_feature(CPU_FTR_DABRX)) - mtspr(SPRN_DABRX, dabrx); - return 0; -} -#else -static inline int __set_dabr(unsigned long dabr, unsigned long dabrx) -{ - return -EINVAL; -} -#endif - static inline int set_dabr(struct arch_hw_breakpoint *brk) { unsigned long dabr, dabrx; @@ -840,7 +816,19 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) if (ppc_md.set_dabr) return ppc_md.set_dabr(dabr, dabrx); - return __set_dabr(dabr, dabrx); + if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) { + mtspr(SPRN_DAC1, dabr); + if (IS_ENABLED(CONFIG_PPC_47x)) + isync(); + return 0; + } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) { + mtspr(SPRN_DABR, dabr); + if (cpu_has_feature(CPU_FTR_DABRX)) + mtspr(SPRN_DABRX, dabrx); + return 0; + } else { + return -EINVAL; + } } static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) @@ -1160,8 +1148,6 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif - - thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1202,7 +1188,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, mtspr(SPRN_TIDR, new_thread->tidr); #endif - thread_pkey_regs_restore(new_thread, old_thread); } struct task_struct *__switch_to(struct task_struct *prev, @@ -1466,12 +1451,10 @@ static void print_msr_bits(unsigned long val) #define LAST_VOLATILE 12 #endif -void show_regs(struct pt_regs * regs) +static void __show_regs(struct pt_regs *regs) { int i, trap; - show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %px TRAP: %04lx %s (%s)\n", @@ -1513,6 +1496,12 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); } +} + +void show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + __show_regs(regs); show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); @@ -1527,14 +1516,27 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } -#ifdef CONFIG_PPC_BOOK3S_64 void arch_setup_new_exec(void) { - if (radix_enabled()) - return; - hash__setup_new_exec(); -} + +#ifdef CONFIG_PPC_BOOK3S_64 + if (!radix_enabled()) + hash__setup_new_exec(); #endif + /* + * If we exec out of a kernel thread then thread.regs will not be + * set. Do it now. + */ + if (!current->thread.regs) { + struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; + current->thread.regs = regs - 1; + } + +#ifdef CONFIG_PPC_MEM_KEYS + current->thread.regs->amr = default_amr; + current->thread.regs->iamr = default_iamr; +#endif +} #ifdef CONFIG_PPC64 /** @@ -1730,7 +1732,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ptrace_bps[i] = NULL; #endif +#ifdef CONFIG_PPC_FPU_REGS p->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif @@ -1747,6 +1751,16 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.tidr = 0; #endif + /* + * Run with the current AMR value of the kernel + */ +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + kregs->amr = AMR_KUAP_BLOCKED; + + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) + kregs->iamr = AMR_KUEP_BLOCKED; +#endif kregs->nip = ppc_function_entry(f); return 0; } @@ -1765,15 +1779,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) preload_new_slb_context(start, sp); #endif - /* - * If we exec out of a kernel thread then thread.regs will not be - * set. Do it now. - */ - if (!current->thread.regs) { - struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE; - current->thread.regs = regs - 1; - } - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Clear any transactional state, we're exec()ing. The cause is @@ -1855,8 +1860,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #endif current->thread.load_slb = 0; current->thread.load_fp = 0; +#ifdef CONFIG_PPC_FPU_REGS memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; +#endif #ifdef CONFIG_ALTIVEC memset(¤t->thread.vr_state, 0, sizeof(current->thread.vr_state)); current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */ @@ -1878,7 +1885,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); @@ -2174,10 +2180,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); + lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n LR = %pS\n", - loglvl, regs->trap, - (void *)regs->nip, (void *)lr); + printk("%s--- interrupt: %lx at %pS\n", + loglvl, regs->trap, (void *)regs->nip); + __show_regs(regs); + printk("%s--- interrupt: %lx\n", + loglvl, regs->trap); + firstframe = 1; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c1545f22c077..ae3c41730367 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -165,7 +165,6 @@ static struct ibm_pa_feature { #ifdef CONFIG_PPC_RADIX_MMU { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE }, #endif - { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN }, { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE, .cpu_user_ftrs = PPC_FEATURE_TRUE_LE }, /* diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index c2f2402ebc8c..8ebc11d1168d 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,10 +6,11 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-fpu.o obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) -obj-y += ptrace-novsx.o +obj-$(CONFIG_PPC_FPU_REGS) += ptrace-novsx.o endif obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o obj-$(CONFIG_SPE) += ptrace-spe.o diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index 67447a6197eb..3487f2c9735c 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -159,8 +159,29 @@ int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset, /* ptrace-view */ +int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data); +int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data); + extern const struct user_regset_view user_ppc_native_view; +/* ptrace-fpu */ +#ifdef CONFIG_PPC_FPU_REGS +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data); +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data); +#else +static inline int +ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + return -EIO; +} + +static inline int +ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + return -EIO; +} +#endif + /* ptrace-(no)adv */ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo); int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c new file mode 100644 index 000000000000..8301cb52dd99 --- /dev/null +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/regset.h> + +#include <asm/switch_to.h> + +#include "ptrace-decl.h" + +int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + else + *data = child->thread.fp_state.fpscr; + + return 0; +} + +int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) +{ + unsigned int fpidx = index - PT_FPR0; + + if (index > PT_FPSCR) + return -EIO; + + flush_fp_to_thread(child); + if (fpidx < (PT_FPSCR - PT_FPR0)) + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + else + child->thread.fp_state.fpscr = data; + + return 0; +} + diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 54f2d076206f..44045363a903 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -86,6 +86,11 @@ int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset) int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif + if (!cpu_has_feature(CPU_FTR_TM)) return -ENODEV; @@ -96,16 +101,12 @@ int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset, flush_fp_to_thread(target); flush_altivec_to_thread(target); - membuf_write(&to, &target->thread.ckpt_regs, - offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_ckpt_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.ckpt_regs.orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_ckpt_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 7e6478e7ed07..2bad8068f598 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -217,6 +217,10 @@ int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) static int gpr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr)); +#ifdef CONFIG_PPC64 + struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe)); +#endif int i; if (target->thread.regs == NULL) @@ -228,15 +232,12 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, target->thread.regs->gpr[i] = NV_REG_POISON; } - membuf_write(&to, target->thread.regs, offsetof(struct pt_regs, msr)); - membuf_store(&to, get_user_msr(target)); - - BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) != - offsetof(struct pt_regs, msr) + sizeof(long)); + membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs)); - membuf_write(&to, &target->thread.regs->orig_gpr3, - sizeof(struct user_pt_regs) - - offsetof(struct pt_regs, orig_gpr3)); + membuf_store(&to_msr, get_user_msr(target)); +#ifdef CONFIG_PPC64 + membuf_store(&to_softe, 0x1ul); +#endif return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) - sizeof(struct user_pt_regs)); } @@ -470,12 +471,12 @@ static int pkey_active(struct task_struct *target, const struct user_regset *reg static int pkey_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { - BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); if (!arch_pkeys_enabled()) return -ENODEV; - membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long)); + membuf_store(&to, target->thread.regs->amr); + membuf_store(&to, target->thread.regs->iamr); return membuf_store(&to, default_uamor); } @@ -508,7 +509,8 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset * Pick the AMR values for the keys that kernel is using. This * will be indicated by the ~default_uamor bits. */ - target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor); + target->thread.regs->amr = (new_amr & default_uamor) | + (target->thread.regs->amr & ~default_uamor); return 0; } @@ -520,11 +522,13 @@ static const struct user_regset native_regsets[] = { .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, +#ifdef CONFIG_PPC_FPU_REGS [REGSET_FPR] = { .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, +#endif #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { .core_note_type = NT_PPC_VMX, .n = 34, diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index f6e51be47c6e..3d44b73adb83 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -55,31 +55,18 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_get_reg(child, (int) index, &tmp); - if (ret) - break; - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); - else - tmp = child->thread.fp_state.fpscr; - } + else + ret = ptrace_get_fpr(child, index, &tmp); + + if (ret) + break; ret = put_user(tmp, datalp); break; } @@ -90,30 +77,15 @@ long arch_ptrace(struct task_struct *child, long request, ret = -EIO; /* convert to index and check */ -#ifdef CONFIG_PPC32 - index = addr >> 2; - if ((addr & 3) || (index > PT_FPSCR) - || (child->thread.regs == NULL)) -#else - index = addr >> 3; - if ((addr & 7) || (index > PT_FPSCR)) -#endif + index = addr / sizeof(long); + if ((addr & (sizeof(long) - 1)) || !child->thread.regs) break; CHECK_FULL_REGS(child->thread.regs); - if (index < PT_FPR0) { + if (index < PT_FPR0) ret = ptrace_put_reg(child, index, data); - } else { - unsigned int fpidx = index - PT_FPR0; - - flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); - else - child->thread.fp_state.fpscr = data; - ret = 0; - } + else + ret = ptrace_put_fpr(child, index, data); break; } diff --git a/arch/powerpc/kernel/ptrace/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c index 7589a9665ffb..d30b9ad70edc 100644 --- a/arch/powerpc/kernel/ptrace/ptrace32.c +++ b/arch/powerpc/kernel/ptrace/ptrace32.c @@ -23,6 +23,8 @@ #include <asm/switch_to.h> +#include "ptrace-decl.h" + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 954f41676f69..d126d71ea5bd 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -684,6 +684,63 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } +/** + * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR. + * + * @fw_status: RTAS call status will be placed here if not NULL. + * + * rtas_ibm_suspend_me() should be called only on a CPU which has + * received H_CONTINUE from the H_JOIN hcall. All other active CPUs + * should be waiting to return from H_JOIN. + * + * rtas_ibm_suspend_me() may suspend execution of the OS + * indefinitely. Callers should take appropriate measures upon return, such as + * resetting watchdog facilities. + * + * Callers may choose to retry this call if @fw_status is + * %RTAS_THREADS_ACTIVE. + * + * Return: + * 0 - The partition has resumed from suspend, possibly after + * migration to a different host. + * -ECANCELED - The operation was aborted. + * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call. + * -EBUSY - Some other condition prevented the suspend from succeeding. + * -EIO - Hardware/platform error. + */ +int rtas_ibm_suspend_me(int *fw_status) +{ + int fwrc; + int ret; + + fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + + switch (fwrc) { + case 0: + ret = 0; + break; + case RTAS_SUSPEND_ABORTED: + ret = -ECANCELED; + break; + case RTAS_THREADS_ACTIVE: + ret = -EAGAIN; + break; + case RTAS_NOT_SUSPENDABLE: + case RTAS_OUTSTANDING_COPROC: + ret = -EBUSY; + break; + case -1: + default: + ret = -EIO; + break; + } + + if (fw_status) + *fw_status = fwrc; + + return ret; +} + void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) @@ -741,163 +798,38 @@ void rtas_os_term(char *str) printk(KERN_EMERG "ibm,os-term call failed %d\n", status); } -static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; -#ifdef CONFIG_PPC_PSERIES -static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - u16 slb_size = mmu_slb_size; - int rc = H_MULTI_THREADS_ACTIVE; - int cpu; - - slb_set_size(SLB_MIN_SIZE); - printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); - - while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && - !atomic_read(&data->error)) - rc = rtas_call(data->token, 0, 1, NULL); - - if (rc || atomic_read(&data->error)) { - printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); - slb_set_size(slb_size); - } - - if (atomic_read(&data->error)) - rc = atomic_read(&data->error); - - atomic_set(&data->error, rc); - pSeries_coalesce_init(); - - if (wake_when_done) { - atomic_set(&data->done, 1); - - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } - - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - - return rc; -} - -int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) -{ - atomic_inc(&data->working); - return __rtas_suspend_last_cpu(data, 0); -} - -static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done) -{ - long rc = H_SUCCESS; - unsigned long msr_save; - int cpu; - - atomic_inc(&data->working); - - /* really need to ensure MSR.EE is off for H_JOIN */ - msr_save = mfmsr(); - mtmsr(msr_save & ~(MSR_EE)); - - while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error)) - rc = plpar_hcall_norets(H_JOIN); - - mtmsr(msr_save); - - if (rc == H_SUCCESS) { - /* This cpu was prodded and the suspend is complete. */ - goto out; - } else if (rc == H_CONTINUE) { - /* All other cpus are in H_JOIN, this cpu does - * the suspend. - */ - return __rtas_suspend_last_cpu(data, wake_when_done); - } else { - printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n", - smp_processor_id(), rc); - atomic_set(&data->error, rc); - } - - if (wake_when_done) { - atomic_set(&data->done, 1); - - /* This cpu did the suspend or got an error; in either case, - * we need to prod all other other cpus out of join state. - * Extra prods are harmless. - */ - for_each_online_cpu(cpu) - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); - } -out: - if (atomic_dec_return(&data->working) == 0) - complete(data->complete); - return rc; -} - -int rtas_suspend_cpu(struct rtas_suspend_me_data *data) -{ - return __rtas_suspend_cpu(data, 0); -} - -static void rtas_percpu_suspend_me(void *info) +/** + * rtas_activate_firmware() - Activate a new version of firmware. + * + * Activate a new version of partition firmware. The OS must call this + * after resuming from a partition hibernation or migration in order + * to maintain the ability to perform live firmware updates. It's not + * catastrophic for this method to be absent or to fail; just log the + * condition in that case. + * + * Context: This function may sleep. + */ +void rtas_activate_firmware(void) { - __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); -} + int token; + int fwrc; -int rtas_ibm_suspend_me(u64 handle) -{ - long state; - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - struct rtas_suspend_me_data data; - DECLARE_COMPLETION_ONSTACK(done); - - if (!rtas_service_present("ibm,suspend-me")) - return -ENOSYS; - - /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, handle); - - state = retbuf[0]; - - if (rc) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); - return rc; - } else if (state == H_VASI_ENABLED) { - return -EAGAIN; - } else if (state != H_VASI_SUSPENDING) { - printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", - state); - return -EIO; + token = rtas_token("ibm,activate-firmware"); + if (token == RTAS_UNKNOWN_SERVICE) { + pr_notice("ibm,activate-firmware method unavailable\n"); + return; } - atomic_set(&data.working, 0); - atomic_set(&data.done, 0); - atomic_set(&data.error, 0); - data.token = rtas_token("ibm,suspend-me"); - data.complete = &done; - - lock_device_hotplug(); - - cpu_hotplug_disable(); - - /* Call function on all CPUs. One of us will make the - * rtas call - */ - on_each_cpu(rtas_percpu_suspend_me, &data, 0); - - wait_for_completion(&done); - - if (atomic_read(&data.error) != 0) - printk(KERN_ERR "Error doing global join\n"); - - - cpu_hotplug_enable(); - - unlock_device_hotplug(); + do { + fwrc = rtas_call(token, 0, 1, NULL); + } while (rtas_busy_delay(fwrc)); - return atomic_read(&data.error); + if (fwrc) + pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } +static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE; +#ifdef CONFIG_PPC_PSERIES /** * rtas_call_reentrant() - Used for reentrant rtas calls * @token: Token for desired reentrant RTAS call @@ -948,12 +880,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) return ret; } -#else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(u64 handle) -{ - return -ENOSYS; -} -#endif +#endif /* CONFIG_PPC_PSERIES */ /** * Find a specific pseries error log in an RTAS extended event log. @@ -1030,7 +957,7 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "ibm,display-message", -1, 0, -1, -1, -1 }, { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, { "ibm,get-indices", -1, 2, 3, -1, -1 }, @@ -1050,9 +977,11 @@ static struct rtas_filter rtas_filters[] __ro_after_init = { { "set-time-for-power-on", -1, -1, -1, -1, -1 }, { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, { "set-time-of-day", -1, -1, -1, -1, -1 }, +#ifdef CONFIG_CPU_BIG_ENDIAN { "ibm,suspend-me", -1, -1, -1, -1, -1 }, { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, +#endif { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, }; @@ -1183,7 +1112,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) int rc = 0; u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) | be32_to_cpu(args.args[1]); - rc = rtas_ibm_suspend_me(handle); + rc = rtas_syscall_dispatch_ibm_suspend_me(handle); if (rc == -EAGAIN) args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE); else if (rc == -EIO) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 808ec9fab605..71f38e9248be 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -90,8 +90,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); */ int dcache_bsize; int icache_bsize; -int ucache_bsize; - unsigned long klimit = (unsigned long) _end; @@ -802,8 +800,6 @@ static __init void print_system_info(void) pr_info("dcache_bsize = 0x%x\n", dcache_bsize); pr_info("icache_bsize = 0x%x\n", icache_bsize); - if (ucache_bsize != 0) - pr_info("ucache_bsize = 0x%x\n", ucache_bsize); pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); pr_info(" possible = 0x%016lx\n", @@ -919,8 +915,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -944,6 +938,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 2ec835574cc9..2dd0d9cb5a20 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -8,12 +8,6 @@ #ifndef __ARCH_POWERPC_KERNEL_SETUP_H #define __ARCH_POWERPC_KERNEL_SETUP_H -#ifdef CONFIG_CC_IS_CLANG -#define __nostackprotector -#else -#define __nostackprotector __attribute__((__optimize__("no-stack-protector"))) -#endif - void initialize_cache_info(void); void irqstack_early_init(void); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 057d6b8e9bb0..8ba49a6bf515 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -222,7 +222,4 @@ __init void initialize_cache_info(void) */ dcache_bsize = cur_cpu_spec->dcache_bsize; icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - if (IS_ENABLED(CONFIG_E200)) - ucache_bsize = icache_bsize = dcache_bsize; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 74fd47f46fa5..c28e949cc222 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -283,7 +283,7 @@ void __init record_spr_defaults(void) * device-tree is not accessible via normal means at this point. */ -void __init __nostackprotector early_setup(unsigned long dt_ptr) +void __init early_setup(unsigned long dt_ptr) { static __initdata struct paca_struct boot_paca; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a8bb0aca1d02..53782aa60ade 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -133,36 +133,6 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task, return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#else -inline unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.fp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.fp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.ckfp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.ckfp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #endif /* Log an error when sending an unhandled signal to a process. Controlled @@ -174,20 +144,22 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32) +static unsigned long get_tm_stackpointer(struct task_struct *tsk); + +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32) { unsigned long oldsp, newsp; + unsigned long sp = get_tm_stackpointer(tsk); /* Default to using normal stack */ - oldsp = get_clean_sp(sp, is_32); + if (is_32) + oldsp = sp & 0x0ffffffffUL; + else + oldsp = sp; oldsp = sigsp(oldsp, ksig); newsp = (oldsp - frame_size) & ~0xFUL; - /* Check access */ - if (!access_ok((void __user *)newsp, oldsp - newsp)) - return NULL; - return (void __user *)newsp; } @@ -331,7 +303,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct task_struct *tsk) +static unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back @@ -379,3 +351,14 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) #endif return ret; } + +static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n"; +static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n"; + +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr) +{ + if (show_unhandled_signals) + printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm, + task_pid_nr(tsk), where, ptr, regs->nip, regs->link); +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index d396efca4068..2559a681536e 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -10,8 +10,8 @@ #ifndef _POWERPC_ARCH_SIGNAL_H #define _POWERPC_ARCH_SIGNAL_H -extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, - size_t frame_size, int is_32); +void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk, + size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); @@ -19,16 +19,6 @@ extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk); -extern unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task); -extern unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from); -extern unsigned long get_tm_stackpointer(struct task_struct *tsk); - #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); @@ -38,6 +28,104 @@ extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); +unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); +unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); + +#define unsafe_copy_fpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \ + unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_vsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NFPREG - 1 ; i++) \ + unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\ + unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \ +} while (0) + +#define unsafe_copy_ckvsx_to_user(to, task, label) do { \ + struct task_struct *__t = task; \ + u64 __user *buf = (u64 __user *)to; \ + int i; \ + \ + for (i = 0; i < ELF_NVSRHALFREG ; i++) \ + unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \ + &buf[i], label);\ +} while (0) +#endif +#elif defined(CONFIG_PPC_FPU_REGS) + +#define unsafe_copy_fpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.fp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +#define unsafe_copy_ckfpr_to_user(to, task, label) \ + unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \ + ELF_NFPREG * sizeof(double), label) + +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) +{ + return __copy_to_user(to, task->thread.ckfp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +static inline unsigned long +copy_ckfpr_from_user(struct task_struct *task, void __user *from) +{ + return __copy_from_user(task->thread.ckfp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) + +static inline unsigned long +copy_fpr_to_user(void __user *to, struct task_struct *task) +{ + return 0; +} + +static inline unsigned long +copy_fpr_from_user(struct task_struct *task, void __user *from) +{ + return 0; +} #endif #ifdef CONFIG_PPC64 @@ -58,4 +146,7 @@ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #endif /* !defined(CONFIG_PPC64) */ +void signal_fault(struct task_struct *tsk, struct pt_regs *regs, + const char *where, void __user *ptr); + #endif /* _POWERPC_ARCH_SIGNAL_H */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 96950f189b5a..934cbdf6dd10 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -58,8 +58,6 @@ #define mcontext mcontext32 #define ucontext ucontext32 -#define __save_altstack __compat_save_altstack - /* * Userspace code may pass a ucontext which doesn't include VSX added * at the end. We need to check for this case. @@ -84,10 +82,7 @@ * Functions for flipping sigsets (thanks to brain dead generic * implementation that makes things simple for little endian only) */ -static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) -{ - return put_compat_sigset(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t unsafe_put_compat_sigset static inline int get_sigset_t(sigset_t *set, const compat_sigset_t __user *uset) @@ -98,8 +93,8 @@ static inline int get_sigset_t(sigset_t *set, #define to_user_ptr(p) ptr_to_compat(p) #define from_user_ptr(p) compat_ptr(p) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int val, i; @@ -113,10 +108,12 @@ static inline int save_general_regs(struct pt_regs *regs, else val = gregs[i]; - if (__put_user(val, &frame->mc_gregs[i])) - return -EFAULT; + unsafe_put_user(val, &frame->mc_gregs[i], failed); } return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -138,10 +135,12 @@ static inline int restore_general_regs(struct pt_regs *regs, #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) -static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set) -{ - return copy_to_user(uset, set, sizeof(*uset)); -} +#define unsafe_put_sigset_t(uset, set, label) do { \ + sigset_t __user *__us = uset ; \ + const sigset_t *__s = set; \ + \ + unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \ +} while (0) static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) { @@ -151,11 +150,15 @@ static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset) #define to_user_ptr(p) ((unsigned long)(p)) #define from_user_ptr(p) ((void __user *)(p)) -static inline int save_general_regs(struct pt_regs *regs, - struct mcontext __user *frame) +static __always_inline int +save_general_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame) { WARN_ON(!FULL_REGS(regs)); - return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); + unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed); + return 0; + +failed: + return 1; } static inline int restore_general_regs(struct pt_regs *regs, @@ -173,6 +176,11 @@ static inline int restore_general_regs(struct pt_regs *regs, } #endif +#define unsafe_save_general_regs(regs, frame, label) do { \ + if (save_general_regs_unsafe(regs, frame)) \ + goto label; \ +} while (0) + /* * When we have signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -199,9 +207,6 @@ struct sigframe { int abigap[56]; }; -/* We use the mc_pad field for the signal return trampoline. */ -#define tramp mc_pad - /* * When we have rt signals to deliver, we set up on the * user stack, going down from the original stack pointer: @@ -235,26 +240,39 @@ struct rt_sigframe { * We only save the altivec/spe registers if the process has used * altivec/spe instructions at some point. */ -static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - int ctx_has_vsx_region) +static void prepare_save_user_regs(int ctx_has_vsx_region) { - unsigned long msr = regs->msr; - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); +#ifdef CONFIG_ALTIVEC + if (current->thread.used_vr) + flush_altivec_to_thread(current); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.vrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_VSX + if (current->thread.used_vsr && ctx_has_vsx_region) + flush_vsx_to_thread(current); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) +{ + unsigned long msr = regs->msr; /* save general registers */ - if (save_general_regs(regs, frame)) - return 1; + unsafe_save_general_regs(regs, frame, failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_VEC; @@ -267,13 +285,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * most significant bits of that same vector. --BenH * Note that the current VRSAVE value is in the SPR at this point. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32])) - return 1; + unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32], + failed); #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) - return 1; + unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -288,19 +303,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * contains valid data */ if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); - if (copy_vsx_to_user(&frame->mc_vsregs, current)) - return 1; + unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* save spe registers */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data */ msr |= MSR_SPE; @@ -308,30 +319,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, /* else assert((regs->msr & MSR_SPE) == 0) */ /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); + /* We need to write 0 the MSR top 32 bits in the tm frame so that we * can check it on the restore to see if TM is active */ - if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR])) - return 1; - - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + if (tm_frame) + unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; } +#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \ + if (save_user_regs_unsafe(regs, frame, tm_frame, has_vsx)) \ + goto label; \ +} while (0) + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Save the current user registers on the user stack. @@ -340,19 +350,28 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, * We also save the transactional registers to a second ucontext in the * frame. * - * See save_user_regs() and signal_64.c:setup_tm_sigcontexts(). + * See save_user_regs_unsafe() and signal_64.c:setup_tm_sigcontexts(). */ -static int save_tm_user_regs(struct pt_regs *regs, - struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret, - unsigned long msr) +static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); +#endif +#ifdef CONFIG_SPE + if (current->thread.used_spe) + flush_spe_to_thread(current); +#endif +} + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ /* Save both sets of general registers */ - if (save_general_regs(¤t->thread.ckpt_regs, frame) - || save_general_regs(regs, tm_frame)) - return 1; + unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); + unsafe_save_general_regs(regs, tm_frame, failed); /* Stash the top half of the 64bit MSR into the 32bit MSR word * of the transactional mcontext. This way we have a backward-compatible @@ -360,26 +379,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * also look at what type of transaction (T or S) was active at the * time of the signal. */ - if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR])) - return 1; + unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - if (msr & MSR_VEC) { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } else { - if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.ckvr_state, - ELF_NVRREG * sizeof(vector128))) - return 1; - } + unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); + if (msr & MSR_VEC) + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.vr_state, + ELF_NVRREG * sizeof(vector128), failed); + else + unsafe_copy_to_user(&tm_frame->mc_vregs, + ¤t->thread.ckvr_state, + ELF_NVRREG * sizeof(vector128), failed); /* set MSR_VEC in the saved MSR value to indicate that * frame->mc_vregs contains valid data @@ -392,31 +406,21 @@ static int save_tm_user_regs(struct pt_regs *regs, * significant bits of a vector, we "cheat" and stuff VRSAVE in the * most significant bits of that same vector. --BenH */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.ckvrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&frame->mc_vregs[32])) - return 1; - if (msr & MSR_VEC) { - if (__put_user(current->thread.vrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } else { - if (__put_user(current->thread.ckvrsave, - (u32 __user *)&tm_frame->mc_vregs[32])) - return 1; - } + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&frame->mc_vregs[32], failed); + if (msr & MSR_VEC) + unsafe_put_user(current->thread.vrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); + else + unsafe_put_user(current->thread.ckvrsave, + (u32 __user *)&tm_frame->mc_vregs[32], failed); #endif /* CONFIG_ALTIVEC */ - if (copy_ckfpr_to_user(&frame->mc_fregs, current)) - return 1; - if (msr & MSR_FP) { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } else { - if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) - return 1; - } + unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); + if (msr & MSR_FP) + unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed); + else + unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); #ifdef CONFIG_VSX /* @@ -426,54 +430,54 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) - return 1; - if (msr & MSR_VSX) { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, - current)) - return 1; - } else { - if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) - return 1; - } + unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed); + if (msr & MSR_VSX) + unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed); + else + unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed); msr |= MSR_VSX; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in save_user_regs(). + * simply the same as in save_user_regs_unsafe(). */ if (current->thread.used_spe) { - flush_spe_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32))) - return 1; + unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, + ELF_NEVRREG * sizeof(u32), failed); /* set MSR_SPE in the saved MSR value to indicate that * frame->mc_vregs contains valid data */ msr |= MSR_SPE; } /* We always copy to/from spefscr */ - if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG)) - return 1; + unsafe_put_user(current->thread.spefscr, + (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); #endif /* CONFIG_SPE */ - if (__put_user(msr, &frame->mc_gregs[PT_MSR])) - return 1; - if (sigret) { - /* Set up the sigreturn trampoline: li 0,sigret; sc */ - if (__put_user(PPC_INST_ADDI + sigret, &frame->tramp[0]) - || __put_user(PPC_INST_SC, &frame->tramp[1])) - return 1; - flush_icache_range((unsigned long) &frame->tramp[0], - (unsigned long) &frame->tramp[2]); - } + unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); return 0; + +failed: + return 1; +} +#else +static void prepare_save_tm_user_regs(void) { } + +static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) +{ + return 0; } #endif +#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \ + if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \ + goto label; \ +} while (0) + /* * Restore the current user register values from the user stack, * (except for MSR). @@ -751,96 +755,189 @@ static long restore_tm_user_regs(struct pt_regs *regs, int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, struct task_struct *tsk) { - struct rt_sigframe __user *rt_sf; - struct mcontext __user *frame; - struct mcontext __user *tm_frame = NULL; - void __user *addr; + struct rt_sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; unsigned long newsp = 0; - int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* Save the thread's msr before get_tm_stackpointer() changes it */ unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); /* Set up Signal Frame */ - /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); - addr = rt_sf; - if (unlikely(rt_sf == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->uc.uc_mcontext; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->uc_transact.uc_mcontext; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) goto badframe; /* Put the siginfo & fill in most of the ucontext */ - if (copy_siginfo_to_user(&rt_sf->info, &ksig->info) - || __put_user(0, &rt_sf->uc.uc_flags) - || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1]) - || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext), - &rt_sf->uc.uc_regs) - || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset)) - goto badframe; + unsafe_put_user(0, &frame->uc.uc_flags, failed); +#ifdef CONFIG_PPC64 + unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#else + unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed); +#endif + unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed); - /* Save user registers on the stack */ - frame = &rt_sf->uc.uc_mcontext; - addr = frame; - if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; + if (MSR_TM_ACTIVE(msr)) { +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + unsafe_put_user((unsigned long)&frame->uc_transact, + &frame->uc.uc_link, failed); + unsafe_put_user((unsigned long)tm_mctx, + &frame->uc_transact.uc_regs, failed); +#endif + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); } else { - sigret = __NR_rt_sigreturn; - tramp = (unsigned long) frame->tramp; + unsafe_put_user(0, &frame->uc.uc_link, failed); + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(msr)) { - if (__put_user((unsigned long)&rt_sf->uc_transact, - &rt_sf->uc.uc_link) || - __put_user((unsigned long)tm_frame, - &rt_sf->uc_transact.uc_regs)) - goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) - goto badframe; - } - else -#endif - { - if (__put_user(0, &rt_sf->uc.uc_link)) - goto badframe; - if (save_user_regs(regs, frame, tm_frame, sigret, 1)) - goto badframe; + /* Save user registers on the stack */ + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], + failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); } + unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); + + user_write_access_end(); + + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + goto badframe; + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + regs->link = tramp; +#ifdef CONFIG_PPC_FPU_REGS tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); - addr = (void __user *)regs->gpr[1]; + newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16); if (put_user(regs->gpr[1], (u32 __user *)newsp)) goto badframe; /* Fill registers for signal handler */ regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) &rt_sf->info; - regs->gpr[5] = (unsigned long) &rt_sf->uc; - regs->gpr[6] = (unsigned long) rt_sf; + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; + regs->gpr[6] = (unsigned long)frame; regs->nip = (unsigned long) ksig->ka.sa.sa_handler; /* enter the signal handler in native-endian mode */ regs->msr &= ~MSR_LE; regs->msr |= (MSR_KERNEL & MSR_LE); return 0; +failed: + user_write_access_end(); + +badframe: + signal_fault(tsk, regs, "handle_rt_signal32", frame); + + return 1; +} + +/* + * OK, we're invoking a handler + */ +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) +{ + struct sigcontext __user *sc; + struct sigframe __user *frame; + struct mcontext __user *mctx; + struct mcontext __user *tm_mctx = NULL; + unsigned long newsp = 0; + unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; + + /* Set up Signal Frame */ + frame = get_sigframe(ksig, tsk, sizeof(*frame), 1); + mctx = &frame->mctx; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + tm_mctx = &frame->mctx_transact; +#endif + if (MSR_TM_ACTIVE(msr)) + prepare_save_tm_user_regs(); + else + prepare_save_user_regs(1); + + if (!user_write_access_begin(frame, sizeof(*frame))) + goto badframe; + sc = (struct sigcontext __user *) &frame->sctx; + +#if _NSIG != 64 +#error "Please adjust handle_signal()" +#endif + unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed); + unsafe_put_user(oldset->sig[0], &sc->oldmask, failed); +#ifdef CONFIG_PPC64 + unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed); +#else + unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed); +#endif + unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed); + unsafe_put_user(ksig->sig, &sc->signal, failed); + + if (MSR_TM_ACTIVE(msr)) + unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed); + else + unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed); + + if (tsk->mm->context.vdso) { + tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); + } else { + tramp = (unsigned long)mctx->mc_pad; + /* Set up the sigreturn trampoline: li r0,sigret; sc */ + unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + } + user_write_access_end(); + + if (tramp == (unsigned long)mctx->mc_pad) + flush_icache_range(tramp, tramp + 2 * sizeof(unsigned long)); + + regs->link = tramp; + +#ifdef CONFIG_PPC_FPU_REGS + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ +#endif + + /* create a stack frame for the caller of the handler */ + newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; + + regs->gpr[1] = newsp; + regs->gpr[3] = ksig->sig; + regs->gpr[4] = (unsigned long) sc; + regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + /* enter the signal handler in big-endian mode */ + regs->msr &= ~MSR_LE; + return 0; + +failed: + user_write_access_end(); + badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_rt_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - addr, regs->nip, regs->link); + signal_fault(tsk, regs, "handle_signal32", frame); return 1; } @@ -967,11 +1064,13 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ mctx = (struct mcontext __user *) ((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); - if (!access_ok(old_ctx, ctx_size) - || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region) - || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked) - || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs)) + prepare_save_user_regs(ctx_has_vsx_region); + if (!user_write_access_begin(old_ctx, ctx_size)) return -EFAULT; + unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed); + unsafe_put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked, failed); + unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed); + user_write_access_end(); } if (new_ctx == NULL) return 0; @@ -995,6 +1094,10 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; + +failed: + user_write_access_end(); + return -EFAULT; } #ifdef CONFIG_PPC64 @@ -1092,12 +1195,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; bad: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_rt_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - rt_sf, regs->nip, regs->link); + signal_fault(current, regs, "sys_rt_sigreturn", rt_sf); force_sig(SIGSEGV); return 0; @@ -1181,12 +1279,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(ctx, regs, 1)) { - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO "%s[%d]: bad frame in " - "sys_debug_setcontext: %p nip %08lx " - "lr %08lx\n", - current->comm, current->pid, - ctx, regs->nip, regs->link); + signal_fault(current, regs, "sys_debug_setcontext", ctx); force_sig(SIGSEGV); goto out; @@ -1208,96 +1301,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, #endif /* - * OK, we're invoking a handler - */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct task_struct *tsk) -{ - struct sigcontext __user *sc; - struct sigframe __user *frame; - struct mcontext __user *tm_mctx = NULL; - unsigned long newsp = 0; - int sigret; - unsigned long tramp; - struct pt_regs *regs = tsk->thread.regs; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Save the thread's msr before get_tm_stackpointer() changes it */ - unsigned long msr = regs->msr; -#endif - - BUG_ON(tsk != current); - - /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); - if (unlikely(frame == NULL)) - goto badframe; - sc = (struct sigcontext __user *) &frame->sctx; - -#if _NSIG != 64 -#error "Please adjust handle_signal()" -#endif - if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler) - || __put_user(oldset->sig[0], &sc->oldmask) -#ifdef CONFIG_PPC64 - || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) -#else - || __put_user(oldset->sig[1], &sc->_unused[3]) -#endif - || __put_user(to_user_ptr(&frame->mctx), &sc->regs) - || __put_user(ksig->sig, &sc->signal)) - goto badframe; - - if (vdso32_sigtramp && tsk->mm->context.vdso_base) { - sigret = 0; - tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; - } else { - sigret = __NR_sigreturn; - tramp = (unsigned long) frame->mctx.tramp; - } - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(msr)) { - if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret, msr)) - goto badframe; - } - else -#endif - { - if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1)) - goto badframe; - } - - regs->link = tramp; - - tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ - - /* create a stack frame for the caller of the handler */ - newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; - if (put_user(regs->gpr[1], (u32 __user *)newsp)) - goto badframe; - - regs->gpr[1] = newsp; - regs->gpr[3] = ksig->sig; - regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler; - /* enter the signal handler in big-endian mode */ - regs->msr &= ~MSR_LE; - return 0; - -badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in handle_signal32: " - "%p nip %08lx lr %08lx\n", - tsk->comm, tsk->pid, - frame, regs->nip, regs->link); - - return 1; -} - -/* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_PPC64 @@ -1363,12 +1366,7 @@ SYSCALL_DEFINE0(sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(KERN_INFO - "%s[%d]: bad frame in sys_sigreturn: " - "%p nip %08lx lr %08lx\n", - current->comm, current->pid, - addr, regs->nip, regs->link); + signal_fault(current, regs, "sys_sigreturn", addr); force_sig(SIGSEGV); return 0; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index bfc939360bad..f9e4a1ac440f 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -66,11 +66,6 @@ struct rt_sigframe { char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); -static const char fmt32[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n"; -static const char fmt64[] = KERN_INFO \ - "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n"; - /* * This computes a quad word aligned pointer inside the vmx_reserve array * element. For historical reasons sigcontext might not be quad word aligned, @@ -801,10 +796,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "rt_sigreturn", - (long)uc, regs->nip, regs->link); + signal_fault(current, regs, "rt_sigreturn", uc); force_sig(SIGSEGV); return 0; @@ -822,10 +814,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long msr = regs->msr; #endif - BUG_ON(tsk != current); - - frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); - if (unlikely(frame == NULL)) + frame = get_sigframe(ksig, tsk, sizeof(*frame), 0); + if (!access_ok(frame, sizeof(*frame))) goto badframe; err |= __put_user(&frame->info, &frame->pinfo); @@ -864,8 +854,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { - regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; + if (tsk->mm->context.vdso) { + regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) @@ -913,10 +903,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, return 0; badframe: - if (show_unhandled_signals) - printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - tsk->comm, tsk->pid, "setup_rt_frame", - (long)frame, regs->nip, regs->link); + signal_fault(current, regs, "handle_rt_signal64", frame); return 1; } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd960..2b9b1bb4c5f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; +bool thread_group_shares_l2; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -99,6 +100,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 +#define THREAD_GROUP_SHARE_L2 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -106,11 +108,27 @@ struct thread_groups { unsigned int thread_list[MAX_THREAD_LIST_SIZE]; }; +/* Maximum number of properties that groups of threads within a core can share */ +#define MAX_THREAD_GROUP_PROPERTIES 2 + +struct thread_groups_list { + unsigned int nr_properties; + struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; +}; + +static struct thread_groups_list tgl[NR_CPUS] __initdata; /* - * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); + +/* + * On some big-cores system, thread_group_l2_cache_map for each CPU + * corresponds to the set its siblings within the core that share the + * L2-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -695,81 +713,100 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores - * the parsed output in the thread_groups - * structure @tg if the ibm,thread-groups[0] - * matches @property. + * the parsed output in the thread_groups_list + * structure @tglp. * * @dn: The device node of the CPU device. - * @tg: Pointer to a thread group structure into which the parsed + * @tglp: Pointer to a thread group list structure into which the parsed * output of "ibm,thread-groups" is stored. - * @property: The property of the thread-group that the caller is - * interested in. * * ibm,thread-groups[0..N-1] array defines which group of threads in * the CPU-device node can be grouped together based on the property. * - * ibm,thread-groups[0] tells us the property based on which the + * This array can represent thread groupings for multiple properties. + * + * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies - * that the threads in the same group share L1, translation cache. + * that the threads in the same group share L1, translation cache. If + * the value is 2, it implies that the threads in the same group share + * the same L2 cache. * - * ibm,thread-groups[1] tells us how many such thread groups exist. + * ibm,thread-groups[i+1] tells us how many such thread groups exist for the + * property ibm,thread-groups[i] * - * ibm,thread-groups[2] tells us the number of threads in each such + * ibm,thread-groups[i+2] tells us the number of threads in each such * group. + * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, * - * ibm,thread-groups[3..N-1] is the list of threads identified by + * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by * "ibm,ppc-interrupt-server#s" arranged as per their membership in * the grouping. * - * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it - * implies that there are 2 groups of 4 threads each, where each group - * of threads share L1, translation cache. + * Example: + * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] + * This can be decomposed up into two consecutive arrays: + * a) [1,2,4,8,10,12,14,9,11,13,15] + * b) [2,2,4,8,10,12,14,9,11,13,15] + * + * where in, + * + * a) provides information of Property "1" being shared by "2" groups, + * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "1" is indicative of the thread in the + * group sharing L1 cache, translation cache and Instruction Data + * flow. * - * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} - * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, - * 11, 12} structure + * b) provides information of Property "2" being shared by "2" groups, + * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "2" indicates that the threads in each + * group share the L2-cache. * * Returns 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. */ static int parse_thread_groups(struct device_node *dn, - struct thread_groups *tg, - unsigned int property) + struct thread_groups_list *tglp) { - int i; - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; - u32 *thread_list; + unsigned int property_idx = 0; + u32 *thread_group_array; size_t total_threads; - int ret; + int ret = 0, count; + u32 *thread_list; + int i = 0; + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, 3); + thread_group_array, count); if (ret) - return ret; - - tg->property = thread_group_array[0]; - tg->nr_groups = thread_group_array[1]; - tg->threads_per_group = thread_group_array[2]; - if (tg->property != property || - tg->nr_groups < 1 || - tg->threads_per_group < 1) - return -ENODATA; + goto out_free; - total_threads = tg->nr_groups * tg->threads_per_group; + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { + int j; + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; - ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, - 3 + total_threads); - if (ret) - return ret; + tg->property = thread_group_array[i]; + tg->nr_groups = thread_group_array[i + 1]; + tg->threads_per_group = thread_group_array[i + 2]; + total_threads = tg->nr_groups * tg->threads_per_group; - thread_list = &thread_group_array[3]; + thread_list = &thread_group_array[i + 3]; - for (i = 0 ; i < total_threads; i++) - tg->thread_list[i] = thread_list[i]; + for (j = 0; j < total_threads; j++) + tg->thread_list[j] = thread_list[j]; + i = i + 3 + total_threads; + } - return 0; + tglp->nr_properties = property_idx; + +out_free: + kfree(thread_group_array); + return ret; } /* @@ -805,50 +842,84 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) return -1; } -static int init_cpu_l1_cache_map(int cpu) - +static struct thread_groups *__init get_thread_groups(int cpu, + int group_property, + int *err) { struct device_node *dn = of_get_cpu_node(cpu, NULL); - struct thread_groups tg = {.property = 0, - .nr_groups = 0, - .threads_per_group = 0}; + struct thread_groups_list *cpu_tgl = &tgl[cpu]; + struct thread_groups *tg = NULL; + int i; + *err = 0; + + if (!dn) { + *err = -ENODATA; + return NULL; + } + + if (!cpu_tgl->nr_properties) { + *err = parse_thread_groups(dn, cpu_tgl); + if (*err) + goto out; + } + + for (i = 0; i < cpu_tgl->nr_properties; i++) { + if (cpu_tgl->property_tgs[i].property == group_property) { + tg = &cpu_tgl->property_tgs[i]; + break; + } + } + + if (!tg) + *err = -EINVAL; +out: + of_node_put(dn); + return tg; +} + +static int __init init_thread_group_cache_map(int cpu, int cache_property) + +{ int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; + struct thread_groups *tg = NULL; + cpumask_var_t *mask = NULL; - if (!dn) - return -ENODATA; + if (cache_property != THREAD_GROUP_SHARE_L1 && + cache_property != THREAD_GROUP_SHARE_L2) + return -EINVAL; - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); - if (err) - goto out; + tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) + return err; - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + cpu_group_start = get_cpu_thread_group_start(cpu, tg); if (unlikely(cpu_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + if (cache_property == THREAD_GROUP_SHARE_L1) + mask = &per_cpu(thread_group_l1_cache_map, cpu); + else if (cache_property == THREAD_GROUP_SHARE_L2) + mask = &per_cpu(thread_group_l2_cache_map, cpu); + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, &tg); + int i_group_start = get_cpu_thread_group_start(i, tg); if (unlikely(i_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + cpumask_set_cpu(i, *mask); } -out: - of_node_put(dn); - return err; + return 0; } static bool shared_caches; @@ -924,7 +995,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_cpu_l1_cache_map(cpu); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); if (err) return err; @@ -935,6 +1006,16 @@ static int init_big_cores(void) } has_big_cores = true; + + for_each_possible_cpu(cpu) { + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + + if (err) + return err; + } + + thread_group_shares_l2 = true; + pr_debug("L2 cache only shared by the threads in the small core\n"); return 0; } @@ -1249,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) if (has_big_cores) submask_fn = cpu_smallcore_mask; + /* + * If the threads in a thread-group share L2 cache, then the + * L2-mask can be obtained from thread_group_l2_cache_map. + */ + if (thread_group_shares_l2) { + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_l2_cache_mask); + } + + /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ + if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && + !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { + pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", + cpu); + } + + return true; + } + l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ @@ -1320,7 +1423,7 @@ static inline void add_cpu_to_smallcore_masks(int cpu) cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 310bcd768cd5..7c85ed04a164 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -35,7 +35,31 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); - kuap_check_amr(); +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_check_amr(); account_cpu_user_entry(); @@ -245,6 +269,12 @@ again: account_cpu_user_exit(); +#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */ + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); +#endif return ret; } @@ -330,6 +360,10 @@ again: account_cpu_user_exit(); + /* + * We do this at the end so that we do context switch with KERNEL AMR + */ + kuap_user_restore(regs); return ret; } @@ -400,7 +434,7 @@ again: * which would cause Read-After-Write stalls. Hence, we take the AMR * value from the check above. */ - kuap_restore_amr(regs, amr); + kuap_kernel_restore(regs, amr); return ret; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cf3f8db7e0e3..67feb3524460 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -82,6 +82,7 @@ static struct clocksource clocksource_timebase = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), .read = timebase_read, + .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER, }; #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF @@ -576,14 +577,11 @@ void timer_interrupt(struct pt_regs *regs) struct pt_regs *old_regs; u64 now; - /* Some implementations of hotplug will get timer interrupts while - * offline, just ignore these and we also need to set - * decrementers_next_tb as MAX to make sure __check_irq_replay - * don't replay timer interrupt when return, otherwise we'll trap - * here infinitely :( + /* + * Some implementations of hotplug will get timer interrupts while + * offline, just ignore these. */ if (unlikely(!cpu_online(smp_processor_id()))) { - *next_tb = ~(u64)0; set_dec(decrementer_max); return; } @@ -855,95 +853,6 @@ static notrace u64 timebase_read(struct clocksource *cs) return (u64)get_tb(); } - -void update_vsyscall(struct timekeeper *tk) -{ - struct timespec64 xt; - struct clocksource *clock = tk->tkr_mono.clock; - u32 mult = tk->tkr_mono.mult; - u32 shift = tk->tkr_mono.shift; - u64 cycle_last = tk->tkr_mono.cycle_last; - u64 new_tb_to_xs, new_stamp_xsec; - u64 frac_sec; - - if (clock != &clocksource_timebase) - return; - - xt.tv_sec = tk->xtime_sec; - xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_mb(); - - /* - * This computes ((2^20 / 1e9) * mult) >> shift as a - * 0.64 fixed-point fraction. - * The computation in the else clause below won't overflow - * (as long as the timebase frequency is >= 1.049 MHz) - * but loses precision because we lose the low bits of the constant - * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9. - * For a shift of 24 the error is about 0.5e-9, or about 0.5ns - * over a second. (Shift values are usually 22, 23 or 24.) - * For high frequency clocks such as the 512MHz timebase clock - * on POWER[6789], the mult value is small (e.g. 32768000) - * and so we can shift the constant by 16 initially - * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the - * remaining shifts after the multiplication, which gives a - * more accurate result (e.g. with mult = 32768000, shift = 24, - * the error is only about 1.2e-12, or 0.7ns over 10 minutes). - */ - if (mult <= 62500000 && clock->shift >= 16) - new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16); - else - new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); - - /* - * Compute the fractional second in units of 2^-32 seconds. - * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift - * in nanoseconds, so multiplying that by 2^32 / 1e9 gives - * it in units of 2^-32 seconds. - * We assume shift <= 32 because clocks_calc_mult_shift() - * generates shift values in the range 0 - 32. - */ - frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift); - do_div(frac_sec, NSEC_PER_SEC); - - /* - * Work out new stamp_xsec value for any legacy users of systemcfg. - * stamp_xsec is in units of 2^-20 seconds. - */ - new_stamp_xsec = frac_sec >> 12; - new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC; - - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - */ - vdso_data->tb_orig_stamp = cycle_last; - vdso_data->stamp_xsec = new_stamp_xsec; - vdso_data->tb_to_xs = new_tb_to_xs; - vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; - vdso_data->stamp_xtime_sec = xt.tv_sec; - vdso_data->stamp_xtime_nsec = xt.tv_nsec; - vdso_data->stamp_sec_fraction = frac_sec; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++(vdso_data->tb_update_count); -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - static void __init clocksource_init(void) { struct clocksource *clock = &clocksource_timebase; @@ -1103,7 +1012,6 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; /* initialise and enable the large decrementer (if we have one) */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5006dcbe1d9f..3ec7b443fe6b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -347,12 +347,6 @@ static bool exception_common(int signr, struct pt_regs *regs, int code, current->thread.trap_nr = code; - /* - * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need - * to capture the content, if the task gets killed. - */ - thread_pkey_regs_save(¤t->thread); - return true; } @@ -757,31 +751,6 @@ int machine_check_generic(struct pt_regs *regs) { return 0; } -#elif defined(CONFIG_E200) -int machine_check_e200(struct pt_regs *regs) -{ - unsigned long reason = mfspr(SPRN_MCSR); - - printk("Machine check in kernel mode.\n"); - printk("Caused by (from MCSR=%lx): ", reason); - - if (reason & MCSR_MCP) - pr_cont("Machine Check Signal\n"); - if (reason & MCSR_CP_PERR) - pr_cont("Cache Push Parity Error\n"); - if (reason & MCSR_CPERR) - pr_cont("Cache Parity Error\n"); - if (reason & MCSR_EXCP_ERR) - pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); - if (reason & MCSR_BUS_IRERR) - pr_cont("Bus - Read Bus Error on instruction fetch\n"); - if (reason & MCSR_BUS_DRERR) - pr_cont("Bus - Read Bus Error on data load\n"); - if (reason & MCSR_BUS_WRERR) - pr_cont("Bus - Write Bus Error on buffered store or cache line push\n"); - - return 0; -} #elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { @@ -1190,7 +1159,9 @@ static void parse_fpe(struct pt_regs *regs) flush_fp_to_thread(current); +#ifdef CONFIG_PPC_FPU_REGS code = __parse_fpscr(current->thread.fp_state.fpscr); +#endif _exception(SIGFPE, regs, code, regs->nip); } diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 8dad44262e75..e839a906fdf2 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,7 +17,10 @@ #include <linux/elf.h> #include <linux/security.h> #include <linux/memblock.h> +#include <linux/syscalls.h> +#include <vdso/datapage.h> +#include <asm/syscall.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mmu_context.h> @@ -30,39 +33,11 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> -#undef DEBUG - -#ifdef DEBUG -#define DBG(fmt...) printk(fmt) -#else -#define DBG(fmt...) -#endif - -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) -static unsigned int vdso32_pages; -static void *vdso32_kbase; -static struct page **vdso32_pagelist; -unsigned long vdso32_sigtramp; -unsigned long vdso32_rt_sigtramp; - -#ifdef CONFIG_VDSO32 extern char vdso32_start, vdso32_end; -#endif - -#ifdef CONFIG_PPC64 extern char vdso64_start, vdso64_end; -static void *vdso64_kbase = &vdso64_start; -static unsigned int vdso64_pages; -static struct page **vdso64_pagelist; -unsigned long vdso64_rt_sigtramp; -#endif /* CONFIG_PPC64 */ - -static int vdso_ready; /* * The vdso data page (aka. systemcfg for old ppc64 fans) is here. @@ -70,77 +45,63 @@ static int vdso_ready; * with it, it will become dynamically allocated */ static union { - struct vdso_data data; + struct vdso_arch_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_arch_data *vdso_data = &vdso_data_store.data; -/* Format of the patch table */ -struct vdso_patch_def +static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, + unsigned long text_size) { - unsigned long ftr_mask, ftr_value; - const char *gen_name; - const char *fix_name; -}; + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; -/* Table of functions to patch based on the CPU type/revision - * - * Currently, we only change sync_dicache to do nothing on processors - * with a coherent icache - */ -static struct vdso_patch_def vdso_patches[] = { - { - CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE, - "__kernel_sync_dicache", "__kernel_sync_dicache_p5" - }, -}; + if (new_size != text_size + PAGE_SIZE) + return -EINVAL; -/* - * Some infos carried around for each of them during parsing at - * boot time. - */ -struct lib32_elfinfo + current->mm->context.vdso = (void __user *)new_vma->vm_start + PAGE_SIZE; + + return 0; +} + +static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf32_Ehdr *hdr; /* ptr to ELF */ - Elf32_Sym *dynsym; /* ptr to .dynsym section */ - unsigned long dynsymsize; /* size of .dynsym section */ - char *dynstr; /* ptr to .dynstr section */ - unsigned long text; /* offset of .text section in .so */ -}; + return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start); +} -struct lib64_elfinfo +static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - Elf64_Ehdr *hdr; - Elf64_Sym *dynsym; - unsigned long dynsymsize; - char *dynstr; - unsigned long text; + return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); +} + +static struct vm_special_mapping vdso32_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso32_mremap, }; +static struct vm_special_mapping vdso64_spec __ro_after_init = { + .name = "[vdso]", + .mremap = vdso64_mremap, +}; /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - struct page **vdso_pagelist; - unsigned long vdso_pages; + struct vm_special_mapping *vdso_spec; + struct vm_area_struct *vma; + unsigned long vdso_size; unsigned long vdso_base; - int rc; - - if (!vdso_ready) - return 0; -#ifdef CONFIG_PPC64 if (is_32bit_task()) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; + vdso_spec = &vdso32_spec; + vdso_size = &vdso32_end - &vdso32_start; vdso_base = VDSO32_MBASE; } else { - vdso_pagelist = vdso64_pagelist; - vdso_pages = vdso64_pages; + vdso_spec = &vdso64_spec; + vdso_size = &vdso64_end - &vdso64_start; /* * On 64bit we don't have a preferred map address. This * allows get_unmapped_area to find an area near other mmaps @@ -148,21 +109,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) */ vdso_base = 0; } -#else - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - vdso_base = VDSO32_MBASE; -#endif - current->mm->context.vdso_base = 0; - - /* vDSO has a problem and was disabled, just don't "enable" it for the - * process - */ - if (vdso_pages == 0) - return 0; /* Add a page to the vdso size for the data page */ - vdso_pages ++; + vdso_size += PAGE_SIZE; /* * pick a base address for the vDSO in process space. We try to put it @@ -170,16 +119,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * and end up putting it elsewhere. * Add enough to the size so that the result can be aligned. */ - if (mmap_write_lock_killable(mm)) - return -EINTR; vdso_base = get_unmapped_area(NULL, vdso_base, - (vdso_pages << PAGE_SHIFT) + - ((VDSO_ALIGNMENT - 1) & PAGE_MASK), + vdso_size + ((VDSO_ALIGNMENT - 1) & PAGE_MASK), 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - rc = vdso_base; - goto fail_mmapsem; - } + if (IS_ERR_VALUE(vdso_base)) + return vdso_base; /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); @@ -187,9 +131,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* * Put vDSO base into mm struct. We need to do this before calling * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO (since arch_vma_name fails). + * will fail to recognise it as a vDSO. */ - current->mm->context.vdso_base = vdso_base; + mm->context.vdso = (void __user *)vdso_base + PAGE_SIZE; /* * our vma flags don't have VM_WRITE so by default, the process isn't @@ -201,434 +145,54 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * It's fine to use that for setting breakpoints in the vDSO code * pages though. */ - rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (rc) { - current->mm->context.vdso_base = 0; - goto fail_mmapsem; - } - - mmap_write_unlock(mm); - return 0; - - fail_mmapsem: - mmap_write_unlock(mm); - return rc; -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base) - return "[vdso]"; - return NULL; -} - - - -#ifdef CONFIG_VDSO32 -static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf32_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - *size = 0; - return NULL; -} - -static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - Elf32_Sym *sym = find_symbol32(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO32: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO32_LBASE; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf32_Sym *sym32_gen, *sym32_fix; - - sym32_gen = find_symbol32(v32, orig); - if (sym32_gen == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym32_gen->st_name = 0; - return 0; - } - sym32_fix = find_symbol32(v32, fix); - if (sym32_fix == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix); - return -1; - } - sym32_gen->st_value = sym32_fix->st_value; - sym32_gen->st_size = sym32_fix->st_size; - sym32_gen->st_info = sym32_fix->st_info; - sym32_gen->st_other = sym32_fix->st_other; - sym32_gen->st_shndx = sym32_fix->st_shndx; - - return 0; -} -#else /* !CONFIG_VDSO32 */ -static unsigned long __init find_function32(struct lib32_elfinfo *lib, - const char *symname) -{ - return 0; -} - -static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - return 0; -} -#endif /* CONFIG_VDSO32 */ - - -#ifdef CONFIG_PPC64 - -static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname, - unsigned long *size) -{ - Elf64_Shdr *sechdrs; - unsigned int i; - char *secnames; - - /* Grab section headers and strings so we can tell who is who */ - sechdrs = (void *)ehdr + ehdr->e_shoff; - secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset; - - /* Find the section they want */ - for (i = 1; i < ehdr->e_shnum; i++) { - if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) { - if (size) - *size = sechdrs[i].sh_size; - return (void *)ehdr + sechdrs[i].sh_offset; - } - } - if (size) - *size = 0; - return NULL; -} - -static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib, - const char *symname) -{ - unsigned int i; - char name[MAX_SYMNAME], *c; - - for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) { - if (lib->dynsym[i].st_name == 0) - continue; - strlcpy(name, lib->dynstr + lib->dynsym[i].st_name, - MAX_SYMNAME); - c = strchr(name, '@'); - if (c) - *c = 0; - if (strcmp(symname, name) == 0) - return &lib->dynsym[i]; - } - return NULL; -} - -/* Note that we assume the section is .text and the symbol is relative to - * the library base - */ -static unsigned long __init find_function64(struct lib64_elfinfo *lib, - const char *symname) -{ - Elf64_Sym *sym = find_symbol64(lib, symname); - - if (sym == NULL) { - printk(KERN_WARNING "vDSO64: function %s not found !\n", - symname); - return 0; - } - return sym->st_value - VDSO64_LBASE; -} - -static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64, - const char *orig, const char *fix) -{ - Elf64_Sym *sym64_gen, *sym64_fix; - - sym64_gen = find_symbol64(v64, orig); - if (sym64_gen == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig); - return -1; - } - if (fix == NULL) { - sym64_gen->st_name = 0; - return 0; - } - sym64_fix = find_symbol64(v64, fix); - if (sym64_fix == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix); - return -1; - } - sym64_gen->st_value = sym64_fix->st_value; - sym64_gen->st_size = sym64_fix->st_size; - sym64_gen->st_info = sym64_fix->st_info; - sym64_gen->st_other = sym64_fix->st_other; - sym64_gen->st_shndx = sym64_fix->st_shndx; - - return 0; + vma = _install_special_mapping(mm, vdso_base, vdso_size, + VM_READ | VM_EXEC | VM_MAYREAD | + VM_MAYWRITE | VM_MAYEXEC, vdso_spec); + return PTR_ERR_OR_ZERO(vma); } -#endif /* CONFIG_PPC64 */ - - -static __init int vdso_do_find_sections(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - void *sect; - - /* - * Locate symbol tables & text section - */ - -#ifdef CONFIG_VDSO32 - v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize); - v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL); - if (v32->dynsym == NULL || v32->dynstr == NULL) { - printk(KERN_ERR "vDSO32: required symbol section not found\n"); - return -1; - } - sect = find_section32(v32->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO32: the .text section was not found\n"); - return -1; - } - v32->text = sect - vdso32_kbase; -#endif - -#ifdef CONFIG_PPC64 - v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize); - v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL); - if (v64->dynsym == NULL || v64->dynstr == NULL) { - printk(KERN_ERR "vDSO64: required symbol section not found\n"); - return -1; - } - sect = find_section64(v64->hdr, ".text", NULL); - if (sect == NULL) { - printk(KERN_ERR "vDSO64: the .text section was not found\n"); - return -1; - } - v64->text = sect - vdso64_kbase; -#endif /* CONFIG_PPC64 */ - - return 0; -} - -static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { - /* - * Find signal trampolines - */ - -#ifdef CONFIG_PPC64 - vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64"); -#endif - vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32"); - vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32"); -} + struct mm_struct *mm = current->mm; + int rc; -static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ -#ifdef CONFIG_VDSO32 - Elf32_Sym *sym32; -#endif -#ifdef CONFIG_PPC64 - Elf64_Sym *sym64; + mm->context.vdso = NULL; - sym64 = find_symbol64(v64, "__kernel_datapage_offset"); - if (sym64 == NULL) { - printk(KERN_ERR "vDSO64: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) = - (vdso64_pages << PAGE_SHIFT) - - (sym64->st_value - VDSO64_LBASE); -#endif /* CONFIG_PPC64 */ + if (mmap_write_lock_killable(mm)) + return -EINTR; -#ifdef CONFIG_VDSO32 - sym32 = find_symbol32(v32, "__kernel_datapage_offset"); - if (sym32 == NULL) { - printk(KERN_ERR "vDSO32: Can't find symbol " - "__kernel_datapage_offset !\n"); - return -1; - } - *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) = - (vdso32_pages << PAGE_SHIFT) - - (sym32->st_value - VDSO32_LBASE); -#endif + rc = __arch_setup_additional_pages(bprm, uses_interp); + if (rc) + mm->context.vdso = NULL; - return 0; + mmap_write_unlock(mm); + return rc; } +#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \ + void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \ + void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \ + \ + do_##type##_fixups((value), __start, __end); \ +} while (0) -static __init int vdso_fixup_features(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) +static void __init vdso_fixup_features(void) { - unsigned long size; - void *start; - #ifdef CONFIG_PPC64 - start = find_section64(v64->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - - start = find_section64(v64->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); - - start = find_section64(v64->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup); #endif /* CONFIG_PPC64 */ #ifdef CONFIG_VDSO32 - start = find_section32(v32->hdr, "__ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->cpu_features, - start, start + size); - - start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size); - if (start) - do_feature_fixups(cur_cpu_spec->mmu_features, - start, start + size); - + VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup); + VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup); #ifdef CONFIG_PPC64 - start = find_section32(v32->hdr, "__fw_ftr_fixup", &size); - if (start) - do_feature_fixups(powerpc_firmware_features, - start, start + size); + VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup); #endif /* CONFIG_PPC64 */ - - start = find_section32(v32->hdr, "__lwsync_fixup", &size); - if (start) - do_lwsync_fixups(cur_cpu_spec->cpu_features, - start, start + size); + VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup); #endif - - return 0; -} - -static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32, - struct lib64_elfinfo *v64) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) { - struct vdso_patch_def *patch = &vdso_patches[i]; - int match = (cur_cpu_spec->cpu_features & patch->ftr_mask) - == patch->ftr_value; - if (!match) - continue; - - DBG("replacing %s with %s...\n", patch->gen_name, - patch->fix_name ? "NONE" : patch->fix_name); - - /* - * Patch the 32 bits and 64 bits symbols. Note that we do not - * patch the "." symbol on 64 bits. - * It would be easy to do, but doesn't seem to be necessary, - * patching the OPD symbol is enough. - */ - vdso_do_func_patch32(v32, v64, patch->gen_name, - patch->fix_name); -#ifdef CONFIG_PPC64 - vdso_do_func_patch64(v32, v64, patch->gen_name, - patch->fix_name); -#endif /* CONFIG_PPC64 */ - } - - return 0; -} - - -static __init int vdso_setup(void) -{ - struct lib32_elfinfo v32; - struct lib64_elfinfo v64; - - v32.hdr = vdso32_kbase; -#ifdef CONFIG_PPC64 - v64.hdr = vdso64_kbase; -#endif - if (vdso_do_find_sections(&v32, &v64)) - return -1; - - if (vdso_fixup_datapage(&v32, &v64)) - return -1; - - if (vdso_fixup_features(&v32, &v64)) - return -1; - - if (vdso_fixup_alt_funcs(&v32, &v64)) - return -1; - - vdso_setup_trampolines(&v32, &v64); - - return 0; } /* @@ -638,27 +202,13 @@ static __init int vdso_setup(void) static void __init vdso_setup_syscall_map(void) { unsigned int i; - extern unsigned long *sys_call_table; -#ifdef CONFIG_PPC64 - extern unsigned long *compat_sys_call_table; -#endif - extern unsigned long sys_ni_syscall; - for (i = 0; i < NR_syscalls; i++) { -#ifdef CONFIG_PPC64 - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#else /* CONFIG_PPC64 */ - if (sys_call_table[i] != sys_ni_syscall) - vdso_data->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); -#endif /* CONFIG_PPC64 */ + compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) + vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -689,10 +239,26 @@ int vdso_getcpu_init(void) early_initcall(vdso_getcpu_init); #endif -static int __init vdso_init(void) +static struct page ** __init vdso_setup_pages(void *start, void *end) { int i; + struct page **pagelist; + int pages = (end - start) >> PAGE_SHIFT; + + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + + pagelist[0] = virt_to_page(vdso_data); + + for (i = 0; i < pages; i++) + pagelist[i + 1] = virt_to_page(start + i * PAGE_SIZE); + + return pagelist; +} +static int __init vdso_init(void) +{ #ifdef CONFIG_PPC64 /* * Fill up the "systemcfg" stuff for backward compatibility @@ -717,75 +283,19 @@ static int __init vdso_init(void) vdso_data->icache_block_size = ppc64_caches.l1i.block_size; vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; - - /* - * Calculate the size of the 64 bits vDSO - */ - vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; - DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); #endif /* CONFIG_PPC64 */ - -#ifdef CONFIG_VDSO32 - vdso32_kbase = &vdso32_start; - - /* - * Calculate the size of the 32 bits vDSO - */ - vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT; - DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages); -#endif - - - /* - * Setup the syscall map in the vDOS - */ vdso_setup_syscall_map(); - /* - * Initialize the vDSO images in memory, that is do necessary - * fixups of vDSO symbols, locate trampolines, etc... - */ - if (vdso_setup()) { - printk(KERN_ERR "vDSO setup failure, not enabled !\n"); - vdso32_pages = 0; -#ifdef CONFIG_PPC64 - vdso64_pages = 0; -#endif - return 0; - } + vdso_fixup_features(); -#ifdef CONFIG_VDSO32 - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[i++] = virt_to_page(vdso_data); - vdso32_pagelist[i] = NULL; -#endif - -#ifdef CONFIG_PPC64 - vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso64_pagelist == NULL); - for (i = 0; i < vdso64_pages; i++) { - struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); - get_page(pg); - vdso64_pagelist[i] = pg; - } - vdso64_pagelist[i++] = virt_to_page(vdso_data); - vdso64_pagelist[i] = NULL; -#endif /* CONFIG_PPC64 */ + if (IS_ENABLED(CONFIG_VDSO32)) + vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); - get_page(virt_to_page(vdso_data)); + if (IS_ENABLED(CONFIG_PPC64)) + vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); smp_wmb(); - vdso_ready = 1; return 0; } diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index 73eada6bc8cd..59aa2944ecae 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -2,8 +2,20 @@ # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules ifdef CROSS32_COMPILE @@ -15,14 +27,16 @@ endif CC32FLAGS := ifdef CONFIG_PPC64 CC32FLAGS += -m32 +KBUILD_CFLAGS := $(filter-out -mcmodel=medium,$(KBUILD_CFLAGS)) endif -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg +targets := $(obj-vdso32) vdso32.so.dbg obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both @@ -33,33 +47,30 @@ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc # Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso32ld_and_check) # assembly rules for the .S files $(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE + $(call if_changed,vdsosym) # actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso32ld_and_check = VDSO32L $@ + cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 3440ddf21c8b..f340e82d1981 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -24,11 +24,15 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr #endif #ifdef CONFIG_PPC64 @@ -84,20 +88,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) - +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 1d23e2771dba..65244416ab94 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text - .global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 beqlr @@ -51,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..c7b54a5dcd3e --- /dev/null +++ b/arch/powerpc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index e7f8f9f1b3f4..a6e29f880e0e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -12,13 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> - -/* Offset for the low 32-bit part of a field of long type */ -#ifdef CONFIG_PPC64 -#define LOPART 4 -#else -#define LOPART 0 -#endif +#include <asm/vdso/gettimeofday.h> .text /* @@ -28,32 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr. r10,r3 /* r10 saves tv */ - mr r11,r4 /* r11 saves tz */ - get_datapage r9, r0 - beq 3f - LOAD_REG_IMMEDIATE(r7, 1000000) /* load up USEC_PER_SEC */ - bl __do_get_tspec@local /* get sec/usec from tb & kernel */ - stw r3,TVAL32_TV_SEC(r10) - stw r4,TVAL32_TV_USEC(r10) - -3: cmplwi r11,0 /* check if tz is NULL */ - mtlr r12 - crclr cr0*4+so - li r3,0 - beqlr - - lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r9) - stw r4,TZONE_TZ_MINWEST(r11) - stw r5,TZONE_TZ_DSTTIME(r11) - - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) /* @@ -63,129 +32,18 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpli cr0,r3,CLOCK_REALTIME - cmpli cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpli cr5,r3,CLOCK_REALTIME_COARSE - cmpli cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0, .Lgettime_fallback - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r9, r0 - LOAD_REG_IMMEDIATE(r7, NSEC_PER_SEC) /* load up NSEC_PER_SEC */ - beq cr5, .Lcoarse_clocks -.Lprecise_clocks: - bl __do_get_tspec@local /* get sec/nsec from tb & kernel */ - bne cr1, .Lfinish /* not monotonic -> all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_xsec. - * At this point, r3,r4 contain our sec/nsec values, r5 and r6 - * can be used, r7 contains NSEC_PER_SEC. - */ - - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* We now have our offset in r5,r6. We create a fake dependency - * on that value and re-check the counter - */ - or r0,r6,r5 - xor r0,r0,r0 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmpl cr0,r8,r0 /* check if updated */ - bne- .Lprecise_clocks - b .Lfinish_monotonic - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -.Lcoarse_clocks: - lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- .Lcoarse_clocks - add r9,r9,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - lwz r4,STAMP_XTIME_NSEC+LOPART(r9) - bne cr6,1f - - /* CLOCK_MONOTONIC_COARSE */ - lwz r5,(WTOM_CLOCK_SEC+LOPART)(r9) - lwz r6,WTOM_CLOCK_NSEC(r9) - - /* check if counter has updated */ - or r0,r6,r5 -1: or r0,r0,r3 - or r0,r0,r4 - xor r0,r0,r0 - add r3,r3,r0 - lwz r0,CFG_TB_UPDATE_COUNT+LOPART(r9) - cmpl cr0,r0,r8 /* check if updated */ - bne- .Lcoarse_clocks - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6, .Lfinish - - /* Calculate and store result. Note that this mimics the C code, - * which may cause funny results if nsec goes negative... is that - * possible at all ? - */ -.Lfinish_monotonic: - add r3,r3,r5 - add r4,r4,r6 - cmpw cr0,r4,r7 - cmpwi cr1,r4,0 - blt 1f - subf r4,r7,r4 - addi r3,r3,1 -1: bge cr1, .Lfinish - addi r3,r3,-1 - add r4,r4,r7 - -.Lfinish: - stw r3,TSPC32_TV_SEC(r11) - stw r4,TSPC32_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -.Lgettime_fallback: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) +/* + * Exact prototype of clock_gettime64() + * + * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts); + * + */ +V_FUNCTION_BEGIN(__kernel_clock_gettime64) + cvdso_call __c_kernel_clock_gettime64 +V_FUNCTION_END(__kernel_clock_gettime64) /* * Exact prototype of clock_getres() @@ -194,37 +52,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmplwi cr0, r3, CLOCK_MAX - cmpwi cr1, r3, CLOCK_REALTIME_COARSE - cmpwi cr7, r3, CLOCK_MONOTONIC_COARSE - bgt cr0, 99f - LOAD_REG_IMMEDIATE(r5, KTIME_LOW_RES) - beq cr1, 1f - beq cr7, 1f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 -1: li r3,0 - cmpli cr0,r4,0 - crclr cr0*4+so - beqlr - stw r3,TSPC32_TV_SEC(r4) - stw r5,TSPC32_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) @@ -235,105 +63,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r9, r0 - - lwz r3,STAMP_XTIME_SEC+LOPART(r9) - - cmplwi r11,0 /* check if t is NULL */ - mtlr r12 - crclr cr0*4+so - beqlr - stw r3,0(r11) /* store result at *t */ - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r3 (seconds) and r4. - * On entry, r7 gives the resolution of r4, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds. - * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5 and r6. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -__do_get_tspec: - .cfi_startproc - /* Check for update count & load values. We use the low - * order 32 bits of the update count - */ -1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r9,r9,r0 - - /* Load orig stamp (offset to TB) */ - lwz r5,CFG_TB_ORIG_STAMP(r9) - lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) - - /* Get a stable TB value */ -2: MFTBU(r3) - MFTBL(r4) - MFTBU(r0) - cmplw cr0,r3,r0 - bne- 2b - - /* Subtract tb orig stamp and shift left 12 bits. - */ - subfc r4,r6,r4 - subfe r0,r5,r3 - slwi r0,r0,12 - rlwimi. r0,r4,12,20,31 - slwi r4,r4,12 - - /* - * Load scale factor & do multiplication. - * We only use the high 32 bits of the tb_to_xs value. - * Even with a 1GHz timebase clock, the high 32 bits of - * tb_to_xs will be at least 4 million, so the error from - * ignoring the low 32 bits will be no more than 0.25ppm. - * The error will just make the clock run very very slightly - * slow until the next time the kernel updates the VDSO data, - * at which point the clock will catch up to the kernel's value, - * so there is no long-term error accumulation. - */ - lwz r5,CFG_TB_TO_XS(r9) /* load values */ - mulhwu r4,r4,r5 - li r3,0 - - beq+ 4f /* skip high part computation if 0 */ - mulhwu r3,r0,r5 - mullw r5,r0,r5 - addc r4,r4,r5 - addze r3,r3 -4: - /* At this point, we have seconds since the xtime stamp - * as a 32.32 fixed-point number in r3 and r4. - * Load & add the xtime stamp. - */ - lwz r5,STAMP_XTIME_SEC+LOPART(r9) - lwz r6,STAMP_SEC_FRAC(r9) - addc r4,r4,r6 - adde r3,r3,r5 - - /* We create a fake dependency on the result in r3/r4 - * and re-check the counter - */ - or r6,r4,r3 - xor r0,r6,r6 - add r9,r9,r0 - lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) - cmplw cr0,r8,r0 /* check if updated */ - bne- 1b - - mulhwu r4,r4,r7 /* convert to micro or nanoseconds */ - - blr - .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S index 7eadac74c7f9..a4b806b0d618 100644 --- a/arch/powerpc/kernel/vdso32/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO32_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -36,17 +39,25 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; #ifdef CONFIG_PPC64 . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; #endif /* @@ -68,49 +79,15 @@ SECTIONS __end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.got1) } } @@ -138,19 +115,14 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; + __kernel_clock_gettime64; __kernel_clock_getres; __kernel_time; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp32; __kernel_sigtramp_rt32; #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) @@ -160,3 +132,9 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp32 = __kernel_sigtramp32; +VDSO_sigtramp_rt32 = __kernel_sigtramp_rt32; diff --git a/arch/powerpc/kernel/vdso32/vgettimeofday.c b/arch/powerpc/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..65fb03fb1731 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime32_data(vd, clock, ts); +} + +int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_time32_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dfd34f68bfa1..d365810a689a 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,16 +1,29 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both @@ -20,28 +33,23 @@ obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + # Force dependency (incbin is bad) -$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so.dbg # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso64ld_and_check) -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ -# actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE + $(call if_changed,vdsosym) -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso64.so: $(obj)/vdso64.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso64.so +# actual build commands +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index cab14324242b..76c3c8cf8ece 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -23,10 +23,14 @@ */ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc +BEGIN_FTR_SECTION + b 3f +END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) mflr r12 .cfi_register lr,r12 - get_datapage r10, r0 + get_datapage r10 mtlr r12 + .cfi_restore lr lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 @@ -61,19 +65,11 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) isync li r3,0 blr - .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache) - - -/* - * POWER5 version of __kernel_sync_dicache - */ -V_FUNCTION_BEGIN(__kernel_sync_dicache_p5) - .cfi_startproc +3: crclr cr0*4+so sync isync li r3,0 blr .cfi_endproc -V_FUNCTION_END(__kernel_sync_dicache_p5) +V_FUNCTION_END(__kernel_sync_dicache) diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 067247d3efb9..00760dc69d68 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -13,9 +13,6 @@ #include <asm/vdso_datapage.h> .text -.global __kernel_datapage_offset; -__kernel_datapage_offset: - .long 0 /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; @@ -31,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - get_datapage r3, r0 + get_datapage r3 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 cmpldi cr0,r4,0 @@ -53,7 +50,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3, r0 + get_datapage r3 ld r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 crclr cr0*4+so diff --git a/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..4bf15ffd5933 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 20f8be40c653..d7a7bfb51081 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -12,6 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> +#include <asm/vdso/gettimeofday.h> .text /* @@ -21,31 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - get_datapage r3, r0 - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l - bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ - std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ - std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) @@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpwi cr5,r3,CLOCK_REALTIME_COARSE - cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r3, r0 - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l - beq cr5,70f -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ - bne cr1,80f /* if not monotonic, all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_tspec. - * At this point, r4,r5 contain our sec/nsec values. - */ - - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r6,r9. We create a fake dependency - * on that result and re-check the counter - */ - or r0,r6,r9 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - b 78f - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -70: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 70b - add r3,r3,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - ld r4,STAMP_XTIME_SEC(r3) - ld r5,STAMP_XTIME_NSEC(r3) - bne cr6,75f - - /* CLOCK_MONOTONIC_COARSE */ - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* check if counter has updated */ - or r0,r6,r9 -75: or r0,r0,r4 - or r0,r0,r5 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 70b - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6,80f - - /* Add wall->monotonic offset and check for overflow or underflow */ -78: add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 79f - subf r5,r7,r5 - addi r4,r4,1 -79: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 - -80: std r4,TSPC64_TV_SEC(r11) - std r5,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 - li r3,0 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) /* @@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r3, r0 - - ld r4,STAMP_XTIME_SEC(r3) - - cmpldi r11,0 /* check if t is NULL */ - beq 2f - std r4,0(r11) /* store result at *t */ -2: mtlr r12 - crclr cr0*4+so - mr r3,r4 - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r4 (seconds) and r5. - * On entry, r7 gives the resolution of r5, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. - * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0, r6 and r9. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -V_FUNCTION_BEGIN(__do_get_tspec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r6) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r6,r9,r6 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - sldi r6,r6,12 /* compute time since stamp_xtime */ - mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ - - /* Add stamp since epoch */ - ld r4,STAMP_XTIME_SEC(r3) - lwz r5,STAMP_SEC_FRAC(r3) - or r0,r4,r5 - or r0,r0,r6 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld r0,r8 /* check if updated */ - bne- 1b /* reload if so */ - - /* convert to seconds & nanoseconds and add to stamp */ - add r6,r6,r5 /* add on fractional seconds of xtime */ - mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ - srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r5,r5,32 - add r4,r4,r6 - blr - .cfi_endproc -V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..6164d1a1ba11 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -4,6 +4,8 @@ * library */ #include <asm/vdso.h> +#include <asm/page.h> +#include <asm-generic/vmlinux.lds.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -15,7 +17,8 @@ ENTRY(_start) SECTIONS { - . = VDSO64_LBASE + SIZEOF_HEADERS; + PROVIDE(_vdso_datapage = . - PAGE_SIZE); + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text .gnu.hash : { *(.gnu.hash) } @@ -37,16 +40,24 @@ SECTIONS PROVIDE(etext = .); . = ALIGN(8); + VDSO_ftr_fixup_start = .; __ftr_fixup : { *(__ftr_fixup) } + VDSO_ftr_fixup_end = .; . = ALIGN(8); + VDSO_mmu_ftr_fixup_start = .; __mmu_ftr_fixup : { *(__mmu_ftr_fixup) } + VDSO_mmu_ftr_fixup_end = .; . = ALIGN(8); + VDSO_lwsync_fixup_start = .; __lwsync_fixup : { *(__lwsync_fixup) } + VDSO_lwsync_fixup_end = .; . = ALIGN(8); + VDSO_fw_ftr_fixup_start = .; __fw_ftr_fixup : { *(__fw_ftr_fixup) } + VDSO_fw_ftr_fixup_end = .; /* * Other stuff is appended to the text segment: @@ -61,56 +72,21 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } _end = .; PROVIDE(end = .); - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the beginning - * of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.opd) } } @@ -138,18 +114,12 @@ VERSION { VDSO_VERSION_STRING { global: - /* - * Has to be there for the kernel to find - */ - __kernel_datapage_offset; - __kernel_get_syscall_map; __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; __kernel_get_tbfreq; __kernel_sync_dicache; - __kernel_sync_dicache_p5; __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; @@ -157,3 +127,8 @@ VERSION local: *; }; } + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp_rt64 = __kernel_sigtramp_rt64; diff --git a/arch/powerpc/kernel/vdso64/vgettimeofday.c b/arch/powerpc/kernel/vdso64/vgettimeofday.c new file mode 100644 index 000000000000..5b5500058344 --- /dev/null +++ b/arch/powerpc/kernel/vdso64/vgettimeofday.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementations of gettimeofday() and similar. + */ +#include <linux/time.h> +#include <linux/types.h> + +int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, + const struct vdso_data *vd) +{ + return __cvdso_clock_gettime_data(vd, clock, ts); +} + +int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, + const struct vdso_data *vd) +{ + return __cvdso_gettimeofday_data(vd, tv, tz); +} + +int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, + const struct vdso_data *vd) +{ + return __cvdso_clock_getres_data(vd, clock_id, res); +} + +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +{ + return __cvdso_time_data(vd, time); +} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e184d17387f6..0318ba436f34 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -200,21 +200,7 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -242,9 +228,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) |