summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig52
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024qds.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xqds.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xrdb.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xqds.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xrdb.dtsi2
-rw-r--r--arch/powerpc/boot/dts/microwatt.dts22
-rw-r--r--arch/powerpc/boot/dts/turris1x.dts14
-rw-r--r--arch/powerpc/boot/dts/warp.dts4
-rwxr-xr-xarch/powerpc/boot/wrapper17
-rw-r--r--arch/powerpc/include/asm/book3s/32/tlbflush.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h56
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h41
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h231
-rw-r--r--arch/powerpc/include/asm/code-patching.h2
-rw-r--r--arch/powerpc/include/asm/cputime.h17
-rw-r--r--arch/powerpc/include/asm/debug.h2
-rw-r--r--arch/powerpc/include/asm/ftrace.h19
-rw-r--r--arch/powerpc/include/asm/hvcall.h3
-rw-r--r--arch/powerpc/include/asm/interrupt.h1
-rw-r--r--arch/powerpc/include/asm/irqflags.h58
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h12
-rw-r--r--arch/powerpc/include/asm/mmu_context.h6
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/tlbflush.h7
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h19
-rw-r--r--arch/powerpc/include/asm/processor.h15
-rw-r--r--arch/powerpc/include/asm/prom.h1
-rw-r--r--arch/powerpc/include/asm/ps3.h4
-rw-r--r--arch/powerpc/include/asm/pte-walk.h25
-rw-r--r--arch/powerpc/include/asm/ptrace.h36
-rw-r--r--arch/powerpc/include/asm/qspinlock.h192
-rw-r--r--arch/powerpc/include/asm/qspinlock_paravirt.h7
-rw-r--r--arch/powerpc/include/asm/qspinlock_types.h72
-rw-r--r--arch/powerpc/include/asm/rtas.h15
-rw-r--r--arch/powerpc/include/asm/spinlock.h2
-rw-r--r--arch/powerpc/include/asm/spinlock_types.h2
-rw-r--r--arch/powerpc/include/asm/syscalls.h7
-rw-r--r--arch/powerpc/kernel/asm-offsets.c34
-rw-r--r--arch/powerpc/kernel/entry_32.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S55
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S130
-rw-r--r--arch/powerpc/kernel/head_32.h4
-rw-r--r--arch/powerpc/kernel/head_40x.S2
-rw-r--r--arch/powerpc/kernel/head_44x.S6
-rw-r--r--arch/powerpc/kernel/head_64.S6
-rw-r--r--arch/powerpc/kernel/head_85xx.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S2
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h4
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c2
-rw-r--r--arch/powerpc/kernel/interrupt.c14
-rw-r--r--arch/powerpc/kernel/interrupt_64.S67
-rw-r--r--arch/powerpc/kernel/irq.c4
-rw-r--r--arch/powerpc/kernel/kgdb.c2
-rw-r--r--arch/powerpc/kernel/kprobes.c14
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S4
-rw-r--r--arch/powerpc/kernel/module_64.c10
-rw-r--r--arch/powerpc/kernel/optprobes.c2
-rw-r--r--arch/powerpc/kernel/optprobes_head.S4
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S57
-rw-r--r--arch/powerpc/kernel/process.c97
-rw-r--r--arch/powerpc/kernel/prom.c4
-rw-r--r--arch/powerpc/kernel/rtas.c192
-rw-r--r--arch/powerpc/kernel/rtasd.c7
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kernel/stacktrace.c10
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c13
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl7
-rw-r--r--arch/powerpc/kernel/time.c23
-rw-r--r--arch/powerpc/kernel/tm.S8
-rw-r--r--arch/powerpc/kernel/trace/ftrace_mprofile.S2
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/kexec/file_load_64.c59
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c5
-rw-r--r--arch/powerpc/kvm/book3s_xive.c12
-rw-r--r--arch/powerpc/kvm/book3s_xive.h3
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c3
-rw-r--r--arch/powerpc/kvm/booke.c3
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S9
-rw-r--r--arch/powerpc/lib/Makefile4
-rw-r--r--arch/powerpc/lib/code-patching.c234
-rw-r--r--arch/powerpc/lib/feature-fixups.c173
-rw-r--r--arch/powerpc/lib/qspinlock.c996
-rw-r--r--arch/powerpc/lib/sstep.c21
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S2
-rw-r--r--arch/powerpc/lib/vmx-helper.c12
-rw-r--r--arch/powerpc/mm/book3s64/hash_4k.c5
-rw-r--r--arch/powerpc/mm/book3s64/hash_64k.c10
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c67
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c8
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c142
-rw-r--r--arch/powerpc/mm/book3s64/internal.h11
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c4
-rw-r--r--arch/powerpc/mm/mem.c1
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c1
-rw-r--r--arch/powerpc/mm/nohash/tlb.c8
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c52
-rw-r--r--arch/powerpc/perf/callchain.c9
-rw-r--r--arch/powerpc/perf/hv-gpci-requests.h4
-rw-r--r--arch/powerpc/perf/hv-gpci.c35
-rw-r--r--arch/powerpc/perf/hv-gpci.h1
-rw-r--r--arch/powerpc/perf/req-gen/perf.h20
-rw-r--r--arch/powerpc/platforms/44x/warp.c105
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c15
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c2
-rw-r--r--arch/powerpc/platforms/85xx/sgy_cts1000.c132
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype9
-rw-r--r--arch/powerpc/platforms/pasemi/gpio_mdio.c4
-rw-r--r--arch/powerpc/platforms/powermac/setup.c18
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c15
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c1
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S38
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c11
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c7
-rw-r--r--arch/powerpc/platforms/pseries/plpks.c50
-rw-r--r--arch/powerpc/platforms/pseries/plpks.h2
-rw-r--r--arch/powerpc/platforms/pseries/vas.c83
-rw-r--r--arch/powerpc/platforms/pseries/vas.h6
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c13
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c4
-rw-r--r--arch/powerpc/sysdev/xive/native.c6
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c1
-rw-r--r--arch/powerpc/xmon/xmon.c17
134 files changed, 3175 insertions, 1115 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9c07068ba5e5..b8c4ac56bddc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
source "arch/powerpc/platforms/Kconfig.cputype"
+config CC_HAS_ELFV2
+ def_bool PPC64 && $(cc-option, -mabi=elfv2)
+
config 32BIT
bool
default y if PPC32
@@ -96,7 +99,7 @@ config LOCKDEP_SUPPORT
config GENERIC_LOCKBREAK
bool
default y
- depends on SMP && PREEMPTION
+ depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS
config GENERIC_HWEIGHT
bool
@@ -147,6 +150,7 @@ config PPC
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_SPLIT_ARG64 if PPC32
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
@@ -154,7 +158,6 @@ config PPC
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
- select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
@@ -287,7 +290,7 @@ config PPC
#
config PPC_LONG_DOUBLE_128
- depends on PPC64
+ depends on PPC64 && ALTIVEC
def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
config PPC_BARRIER_NOSPEC
@@ -295,6 +298,9 @@ config PPC_BARRIER_NOSPEC
default y
depends on PPC_BOOK3S_64 || PPC_E500
+config PPC_HAS_LBARX_LHARX
+ bool
+
config EARLY_PRINTK
bool
default y
@@ -530,6 +536,15 @@ config HOTPLUG_CPU
Say N if you are unsure.
+config INTERRUPT_SANITIZE_REGISTERS
+ bool "Clear gprs on interrupt arrival"
+ depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+ default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
+ help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
@@ -584,6 +599,24 @@ config KEXEC_FILE
config ARCH_HAS_KEXEC_PURGATORY
def_bool KEXEC_FILE
+config PPC64_BIG_ENDIAN_ELF_ABI_V2
+ bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)"
+ depends on PPC64 && CPU_BIG_ENDIAN
+ depends on CC_HAS_ELFV2
+ depends on LD_IS_BFD && LD_VERSION >= 22400
+ default n
+ help
+ This builds the kernel image using the "Power Architecture 64-Bit ELF
+ V2 ABI Specification", which has a reduced stack overhead and faster
+ function calls. This internal kernel ABI option does not affect
+ userspace compatibility.
+
+ The V2 ABI is standard for 64-bit little-endian, but for big-endian
+ it is less well tested by kernel and toolchain. However some distros
+ build userspace this way, and it can produce a functioning kernel.
+
+ This requires GCC and binutils 2.24 or newer.
+
config RELOCATABLE
bool "Build a relocatable kernel"
depends on PPC64 || (FLATMEM && (44x || PPC_85xx))
@@ -1013,19 +1046,6 @@ config PPC_SECVAR_SYSFS
read/write operations on these variables. Say Y if you have
secure boot enabled and want to expose variables to userspace.
-config PPC_RTAS_FILTER
- bool "Enable filtering of RTAS syscalls"
- default y
- depends on PPC_RTAS
- help
- The RTAS syscall API has security issues that could be used to
- compromise system integrity. This option enforces restrictions on the
- RTAS calls and arguments passed by userspace programs to mitigate
- these issues.
-
- Say Y unless you know what you are doing and the filter is causing
- problems for you.
-
endmenu
config ISA_DMA_API
diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts
index d6858b7cd93f..9ea7942f914e 100644
--- a/arch/powerpc/boot/dts/fsl/t1024qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -151,7 +151,7 @@
};
i2c@118000 {
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index dbcd31cc35dc..270aaf631f2a 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -165,7 +165,7 @@
};
i2c@118100 {
- pca9546@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9546";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
index 615479732252..1c329f076f64 100644
--- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
@@ -268,7 +268,7 @@
};
i2c@118000 {
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index bfe1ed5be337..fc7bec5dcb90 100644
--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -128,7 +128,7 @@
};
i2c@118100 {
- pca9546@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9546";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
index db4139999b28..962c99941645 100644
--- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -135,7 +135,7 @@
};
i2c@118000 {
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
index ff87e67c70da..ecc3e8c7394c 100644
--- a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
@@ -138,7 +138,7 @@
};
i2c@118100 {
- pca9546@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9546";
reg = <0x77>;
};
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index b69db1d275cd..269e930b3b0b 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -21,6 +21,14 @@
reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
};
+ clocks {
+ sys_clk: litex_sys_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <100000000>;
+ };
+ };
+
cpus {
#size-cells = <0x00>;
#address-cells = <0x01>;
@@ -141,6 +149,20 @@
litex,slot-size = <0x800>;
interrupts = <0x11 0x1>;
};
+
+ mmc@8040000 {
+ compatible = "litex,mmc";
+ reg = <0x8042800 0x800
+ 0x8041000 0x800
+ 0x8040800 0x800
+ 0x8042000 0x800
+ 0x8041800 0x800>;
+ reg-names = "phy", "core", "reader", "writer", "irq";
+ bus-width = <4>;
+ interrupts = <0x13 1>;
+ cap-sd-highspeed;
+ clocks = <&sys_clk>;
+ };
};
chosen {
diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts
index 045af668e928..e9cda34a140e 100644
--- a/arch/powerpc/boot/dts/turris1x.dts
+++ b/arch/powerpc/boot/dts/turris1x.dts
@@ -69,6 +69,20 @@
interrupt-parent = <&gpio>;
interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */
<13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Local temperature sensor (SA56004ED internal) */
+ channel@0 {
+ reg = <0>;
+ label = "board";
+ };
+
+ /* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */
+ channel@1 {
+ reg = <1>;
+ label = "cpu";
+ };
};
/* DDR3 SPD/EEPROM */
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index b4f32740870e..aa62d08e97c2 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -258,14 +258,12 @@
};
power-leds {
- compatible = "gpio-leds";
+ compatible = "warp-power-leds";
green {
gpios = <&GPIO1 0 0>;
- default-state = "keep";
};
red {
gpios = <&GPIO1 1 0>;
- default-state = "keep";
};
};
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 5bdd4dd20bbb..af04cea82b94 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -215,6 +215,11 @@ ld_version()
}'
}
+ld_is_lld()
+{
+ ${CROSS}ld -V 2>&1 | grep -q LLD
+}
+
# Do not include PT_INTERP segment when linking pie. Non-pie linking
# just ignores this option.
LD_VERSION=$(${CROSS}ld --version | ld_version)
@@ -223,6 +228,14 @@ if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then
nodl="--no-dynamic-linker"
fi
+# suppress some warnings in recent ld versions
+nowarn="-z noexecstack"
+if ! ld_is_lld; then
+ if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then
+ nowarn="$nowarn --no-warn-rwx-segments"
+ fi
+fi
+
platformo=$object/"$platform".o
lds=$object/zImage.lds
ext=strip
@@ -504,7 +517,7 @@ if [ "$platform" != "miboot" ]; then
text_start="-Ttext $link_address"
fi
#link everything
- ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \
+ ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \
$platformo $tmp $object/wrapper.a
rm $tmp
fi
@@ -581,7 +594,7 @@ ps3)
# reached, then enter the system reset vector of the partially decompressed
# image. No warning is issued.
rm -f "$odir"/{otheros,otheros-too-big}.bld
- size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1)
+ size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1)
bld="otheros.bld"
if [ $size -gt $((0x1000000)) ]; then
bld="otheros-too-big.bld"
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index ba1743c52b56..4be572908124 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -2,6 +2,8 @@
#ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
#define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+#include <linux/build_bug.h>
+
#define MMU_NO_CONTEXT (0)
/*
* TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
@@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
{
flush_tlb_page(vma, vmaddr);
}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ BUILD_BUG();
+}
+
static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
flush_tlb_mm(mm);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index fab8332fe1ad..146287d9580f 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled())
return;
+ /*
+ * apply_to_page_range can call us this preempt enabled when
+ * operating on kernel page tables.
+ */
+ preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
+ preempt_enable();
}
#define arch_flush_lazy_mmu_mode() do {} while (0)
@@ -59,56 +65,6 @@ extern void flush_hash_range(unsigned long number, int local);
extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
pmd_t *pmdp, unsigned int psize, int ssize,
unsigned long flags);
-static inline void hash__local_flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void hash__flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void hash__local_flush_all_mm(struct mm_struct *mm)
-{
- /*
- * There's no Page Walk Cache for hash, so what is needed is
- * the same as flush_tlb_mm(), which doesn't really make sense
- * with hash. So the only thing we could do is flush the
- * entire LPID! Punt for now, as it's not being used.
- */
- WARN_ON_ONCE(1);
-}
-
-static inline void hash__flush_all_mm(struct mm_struct *mm)
-{
- /*
- * There's no Page Walk Cache for hash, so what is needed is
- * the same as flush_tlb_mm(), which doesn't really make sense
- * with hash. So the only thing we could do is flush the
- * entire LPID! Punt for now, as it's not being used.
- */
- WARN_ON_ONCE(1);
-}
-
-static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void hash__flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void hash__flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void hash__flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
-}
-
struct mmu_gather;
extern void hash__tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 67655cd60545..dd39313242b4 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -47,8 +47,7 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
if (radix_enabled())
- return radix__flush_pmd_tlb_range(vma, start, end);
- return hash__flush_tlb_range(vma, start, end);
+ radix__flush_pmd_tlb_range(vma, start, end);
}
#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
@@ -57,81 +56,65 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long end)
{
if (radix_enabled())
- return radix__flush_hugetlb_tlb_range(vma, start, end);
- return hash__flush_tlb_range(vma, start, end);
+ radix__flush_hugetlb_tlb_range(vma, start, end);
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
if (radix_enabled())
- return radix__flush_tlb_range(vma, start, end);
- return hash__flush_tlb_range(vma, start, end);
+ radix__flush_tlb_range(vma, start, end);
}
static inline void flush_tlb_kernel_range(unsigned long start,
unsigned long end)
{
if (radix_enabled())
- return radix__flush_tlb_kernel_range(start, end);
- return hash__flush_tlb_kernel_range(start, end);
+ radix__flush_tlb_kernel_range(start, end);
}
static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
if (radix_enabled())
- return radix__local_flush_tlb_mm(mm);
- return hash__local_flush_tlb_mm(mm);
+ radix__local_flush_tlb_mm(mm);
}
static inline void local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
if (radix_enabled())
- return radix__local_flush_tlb_page(vma, vmaddr);
- return hash__local_flush_tlb_page(vma, vmaddr);
+ radix__local_flush_tlb_page(vma, vmaddr);
}
-static inline void local_flush_all_mm(struct mm_struct *mm)
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
{
if (radix_enabled())
- return radix__local_flush_all_mm(mm);
- return hash__local_flush_all_mm(mm);
+ radix__local_flush_tlb_page_psize(mm, vmaddr, psize);
}
static inline void tlb_flush(struct mmu_gather *tlb)
{
if (radix_enabled())
- return radix__tlb_flush(tlb);
- return hash__tlb_flush(tlb);
+ radix__tlb_flush(tlb);
}
#ifdef CONFIG_SMP
static inline void flush_tlb_mm(struct mm_struct *mm)
{
if (radix_enabled())
- return radix__flush_tlb_mm(mm);
- return hash__flush_tlb_mm(mm);
+ radix__flush_tlb_mm(mm);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
{
if (radix_enabled())
- return radix__flush_tlb_page(vma, vmaddr);
- return hash__flush_tlb_page(vma, vmaddr);
-}
-
-static inline void flush_all_mm(struct mm_struct *mm)
-{
- if (radix_enabled())
- return radix__flush_all_mm(mm);
- return hash__flush_all_mm(mm);
+ radix__flush_tlb_page(vma, vmaddr);
}
#else
#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
-#define flush_all_mm(mm) local_flush_all_mm(mm)
#endif /* CONFIG_SMP */
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index 05f246c0e36e..d0ea0571e79a 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
* the previous value stored there.
*/
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
XCHG_GEN(u8, _local, "memory");
XCHG_GEN(u8, _relaxed, "cc");
XCHG_GEN(u16, _local, "memory");
XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lbarx %0,0,%2 # __xchg_u8_local\n"
+" stbcx. %3,0,%2 \n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lbarx %0,0,%2 # __xchg_u8_relaxed\n"
+" stbcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2 # __xchg_u16_local\n"
+" sthcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2 # __xchg_u16_relaxed\n"
+" sthcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
(unsigned long)_x_, sizeof(*(ptr))); \
})
+
/*
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
CMPXCHG_GEN(u8, _local, , , "memory");
CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,168 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
CMPXCHG_GEN(u16, _local, , , "memory");
CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lbarx %0,0,%2 # __cmpxchg_u8\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b"
+ PPC_ATOMIC_EXIT_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_local\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lharx %0,0,%2 # __cmpxchg_u16\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_local\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 1c6316ec4b74..3f881548fb61 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,8 +22,6 @@
#define BRANCH_SET_LINK 0x1
#define BRANCH_ABSOLUTE 0x2
-DECLARE_STATIC_KEY_FALSE(init_mem_is_free);
-
/*
* Powerpc branch instruction is :
*
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 431ae2343022..4961fb38e438 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -21,23 +21,8 @@
#include <asm/param.h>
#include <asm/firmware.h>
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
-
#ifdef __KERNEL__
-/*
- * Convert cputime <-> microseconds
- */
-extern u64 __cputime_usec_factor;
-
-static inline unsigned long cputime_to_usecs(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_usec_factor);
-}
-
-#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime)
+#define cputime_to_nsecs(cputime) tb_to_ns(cputime)
/*
* PPC64 uses PACA which is task independent for storing accounting data while
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 86a14736c76c..51c744608f37 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
#endif
void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void suspend_breakpoints(void);
+void restore_breakpoints(void);
bool ppc_breakpoint_available(void);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 3cee7115441b..441c5f08258b 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -10,6 +10,13 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+/* Ignore unused weak functions which will have larger offsets */
+#ifdef CONFIG_MPROFILE_KERNEL
+#define FTRACE_MCOUNT_MAX_OFFSET 12
+#elif defined(CONFIG_PPC32)
+#define FTRACE_MCOUNT_MAX_OFFSET 8
+#endif
+
#ifndef __ASSEMBLY__
extern void _mcount(void);
@@ -64,17 +71,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
* those.
*/
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
-#ifdef CONFIG_PPC64_ELF_ABI_V1
-static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
-{
- /* We need to skip past the initial dot, and the __se_sys alias */
- return !strcmp(sym + 1, name) ||
- (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) ||
- (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) ||
- (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) ||
- (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4));
-}
-#else
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
return !strcmp(sym, name) ||
@@ -83,7 +79,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
(!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
(!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
}
-#endif /* CONFIG_PPC64_ELF_ABI_V1 */
#endif /* CONFIG_FTRACE_SYSCALLS */
#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER)
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8abae463f6c1..95fd7f9485d5 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -79,7 +79,7 @@
#define H_NOT_ENOUGH_RESOURCES -44
#define H_R_STATE -45
#define H_RESCINDED -46
-#define H_P1 -54
+#define H_ABORTED -54
#define H_P2 -55
#define H_P3 -56
#define H_P4 -57
@@ -100,7 +100,6 @@
#define H_COP_HW -74
#define H_STATE -75
#define H_IN_USE -77
-#define H_ABORTED -78
#define H_UNSUPPORTED_FLAG_START -256
#define H_UNSUPPORTED_FLAG_END -511
#define H_MULTI_THREADS_ACTIVE -9005
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..6d8492b6e2b8 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 1a6c1ce17735..47d46712928a 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -11,64 +11,6 @@
*/
#include <asm/hw_irq.h>
-#else
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_IRQSOFF_TRACER
-/*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
-#define TRACE_WITH_FRAME_BUFFER(func) \
- mflr r0; \
- stdu r1, -STACK_FRAME_OVERHEAD(r1); \
- std r0, 16(r1); \
- stdu r1, -STACK_FRAME_OVERHEAD(r1); \
- bl func; \
- ld r1, 0(r1); \
- ld r1, 0(r1);
-#else
-#define TRACE_WITH_FRAME_BUFFER(func) \
- bl func;
-#endif
-
-/*
- * These are calls to C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on)
-#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
-
-/*
- * This is used by assembly code to soft-disable interrupts first and
- * reconcile irq state.
- *
- * NB: This may call C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACAIRQSOFTMASK(r13); \
- lbz __rB,PACAIRQHAPPENED(r13); \
- andi. __rA,__rA,IRQS_DISABLED; \
- li __rA,IRQS_DISABLED; \
- ori __rB,__rB,PACA_IRQ_HARD_DIS; \
- stb __rB,PACAIRQHAPPENED(r13); \
- bne 44f; \
- stb __rA,PACAIRQSOFTMASK(r13); \
- TRACE_DISABLE_INTS; \
-44:
-
-#else
-#define TRACE_ENABLE_INTS
-#define TRACE_DISABLE_INTS
-
-#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACAIRQHAPPENED(r13); \
- li __rB,IRQS_DISABLED; \
- ori __rA,__rA,PACA_IRQ_HARD_DIS; \
- stb __rB,PACAIRQSOFTMASK(r13); \
- stb __rA,PACAIRQHAPPENED(r13)
-#endif
#endif
#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index c8882d9b86c2..a36797938620 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -105,7 +105,7 @@ struct kvmppc_host_state {
void __iomem *xive_tima_virt;
u32 saved_xirr;
u64 dabr;
- u64 host_mmcr[10]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */
+ u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
u32 host_pmc[8];
u64 host_purr;
u64 host_spurr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bfacf12784dd..eae9619b6190 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -1014,6 +1014,18 @@ static inline void kvmppc_fix_ee_before_entry(void)
#endif
}
+static inline void kvmppc_fix_ee_after_exit(void)
+{
+#ifdef CONFIG_PPC64
+ /* Only need to enable IRQs by hard enabling them after this */
+ local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+#endif
+
+ trace_hardirqs_off();
+}
+
+
static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
{
ulong ea;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index c1ea270bb848..57f5017111f4 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -151,8 +151,8 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
* nMMU and/or PSL need to be cleaned up.
*
* Both the 'copros' and 'active_cpus' counts are looked at in
- * flush_all_mm() to determine the scope (local/global) of the
- * TLBIs, so we need to flush first before decrementing
+ * radix__flush_all_mm() to determine the scope (local/global)
+ * of the TLBIs, so we need to flush first before decrementing
* 'copros'. If this API is used by several callers for the
* same context, it can lead to over-flushing. It's hopefully
* not common enough to be a problem.
@@ -164,7 +164,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
* in-between.
*/
if (radix_enabled()) {
- flush_all_mm(mm);
+ radix__flush_all_mm(mm);
c = atomic_dec_if_positive(&mm->context.copros);
/* Detect imbalance between add and remove */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0d40b33184eb..0e861e59b769 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -256,8 +256,14 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
num = number_of_cells_per_pte(pmd, new, huge);
- for (i = 0; i < num; i++, entry++, new += SZ_4K)
- *entry = new;
+ for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) {
+ *entry++ = new;
+ if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) {
+ *entry++ = new;
+ *entry++ = new;
+ *entry++ = new;
+ }
+ }
return old;
}
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index d9067dfc531c..69c3a050a3d8 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -183,7 +183,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
* cases, and 32-bit non-hash with 32-bit PTEs.
*/
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
- ptep->pte = ptep->pte1 = ptep->pte2 = ptep->pte3 = pte_val(pte);
+ ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte);
#else
*ptep = pte;
#endif
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
index bdaf34ad41ea..9a2cf83ea4f1 100644
--- a/arch/powerpc/include/asm/nohash/tlbflush.h
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon
asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
}
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
start &= PAGE_MASK;
@@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void local_flush_tlb_mm(struct mm_struct *mm);
extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize);
extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
int tsize, int ind);
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 753a2757bcd4..d2f44612f4b0 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -74,6 +74,25 @@
#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
#define REST_GPR(n, base) REST_GPRS(n, n, base)
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \
+ ZEROIZE_GPRS(5, 12); \
+ ZEROIZE_NVGPRS()
+#define SANITIZE_GPR(n) ZEROIZE_GPR(n)
+#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_SYSCALL_GPRS()
+#define SANITIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
#define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
#define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 631802999d59..e96c9b8c2a60 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -374,9 +374,18 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#endif
-/* Check that a certain kernel stack pointer is valid in task_struct p */
-int validate_sp(unsigned long sp, struct task_struct *p,
- unsigned long nbytes);
+/*
+ * Check that a certain kernel stack pointer is a valid (minimum sized)
+ * stack frame in task_struct p.
+ */
+int validate_sp(unsigned long sp, struct task_struct *p);
+
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes);
/*
* Prefetch macros.
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 2e82820fbd64..c0107d8ddd8c 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -85,6 +85,7 @@ struct of_drc_info {
extern int of_read_drc_info_cell(struct property **prop,
const __be32 **curval, struct of_drc_info *data);
+extern unsigned int boot_cpu_node_count;
/*
* There are two methods for telling firmware what our capabilities are.
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 8a0d8fb35328..d503dbd7856c 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -425,10 +425,6 @@ static inline void *ps3_system_bus_get_drvdata(
return dev_get_drvdata(&dev->core);
}
-/* These two need global scope for get_arch_dma_ops(). */
-
-extern struct bus_type ps3_system_bus_type;
-
/* system manager */
struct ps3_sys_manager_ops {
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 714a35f0d425..73c22c579a79 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -60,29 +60,4 @@ static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
return pa;
}
-/*
- * This is what we should always use. Any other lockless page table lookup needs
- * careful audit against THP split.
- */
-static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *hshift)
-{
- pte_t *pte;
-
- VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
- VM_WARN(pgdir != current->mm->pgd,
- "%s lock less page table lookup called on wrong mm\n", __func__);
- pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
-
-#if defined(CONFIG_DEBUG_VM) && \
- !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
- /*
- * We should not find huge page if these configs are not enabled.
- */
- if (hshift)
- WARN_ON(*hshift);
-#endif
- return pte;
-}
-
#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 2efec6d87049..0eb90a013346 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -97,8 +97,6 @@ struct pt_regs
#endif
-#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs))
-
// Always displays as "REGS" in memory dumps
#ifdef CONFIG_CPU_BIG_ENDIAN
#define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753)
@@ -120,16 +118,27 @@ struct pt_regs
#define USER_REDZONE_SIZE 512
#define KERNEL_REDZONE_SIZE 288
-#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */
#define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */
-#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \
- STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
-#define STACK_FRAME_MARKER 12
#ifdef CONFIG_PPC64_ELF_ABI_V2
#define STACK_FRAME_MIN_SIZE 32
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_MARKER STACK_FRAME_MIN_SIZE
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_SWITCH_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16)
#else
-#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD
+/*
+ * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save
+ * area. This parameter area is not used by calls to C from interrupt entry,
+ * so the second from last one of those is used for the frame marker.
+ */
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 16)
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE
#endif
/* Size of dummy stack frame allocated when calling signal handler. */
@@ -140,17 +149,22 @@ struct pt_regs
#define USER_REDZONE_SIZE 0
#define KERNEL_REDZONE_SIZE 0
-#define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */
+#define STACK_FRAME_MIN_SIZE 16
#define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */
-#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_FRAME_MARKER 2
-#define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 8)
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE
/* Size of stack frame allocated when calling signal handler. */
#define __SIGNAL_FRAMESIZE 64
#endif /* __powerpc64__ */
+#define STACK_INT_FRAME_SIZE (KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE)
+#define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long))
+
#ifndef __ASSEMBLY__
#include <asm/paca.h>
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index b676c4fb90fd..28a53fb69b38 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -2,83 +2,173 @@
#ifndef _ASM_POWERPC_QSPINLOCK_H
#define _ASM_POWERPC_QSPINLOCK_H
-#include <asm-generic/qspinlock_types.h>
+#include <linux/compiler.h>
+#include <asm/qspinlock_types.h>
#include <asm/paravirt.h>
-#define _Q_PENDING_LOOPS (1 << 9) /* not tuned */
+#ifdef CONFIG_PPC64
+/*
+ * Use the EH=1 hint for accesses that result in the lock being acquired.
+ * The hardware is supposed to optimise this pattern by holding the lock
+ * cacheline longer, and releasing when a store to the same memory (the
+ * unlock) is performed.
+ */
+#define _Q_SPIN_EH_HINT 1
+#else
+#define _Q_SPIN_EH_HINT 0
+#endif
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * makes locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
-static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
-{
- if (!is_shared_processor())
- native_queued_spin_lock_slowpath(lock, val);
- else
- __pv_queued_spin_lock_slowpath(lock, val);
-}
+/*
+ * Put a speculation barrier after testing the lock/node and finding it
+ * busy. Try to prevent pointless speculation in slow paths.
+ *
+ * Slows down the lockstorm microbenchmark with no stealing, where locking
+ * is purely FIFO through the queue. May have more benefit in real workload
+ * where speculating into the wrong place could have a greater cost.
+ */
+#define _Q_SPIN_SPEC_BARRIER 0
-#define queued_spin_unlock queued_spin_unlock
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- if (!is_shared_processor())
- smp_store_release(&lock->locked, 0);
- else
- __pv_queued_spin_unlock(lock);
-}
+#ifdef CONFIG_PPC64
+/*
+ * Execute a miso instruction after passing the MCS lock ownership to the
+ * queue head. Miso is intended to make stores visible to other CPUs sooner.
+ *
+ * This seems to make the lockstorm microbenchmark nospin test go slightly
+ * faster on POWER10, but disable for now.
+ */
+#define _Q_SPIN_MISO 0
+#else
+#define _Q_SPIN_MISO 0
+#endif
+#ifdef CONFIG_PPC64
+/*
+ * This executes miso after an unlock of the lock word, having ownership
+ * pass to the next CPU sooner. This will slow the uncontended path to some
+ * degree. Not evidence it helps yet.
+ */
+#define _Q_SPIN_MISO_UNLOCK 0
#else
-extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+#define _Q_SPIN_MISO_UNLOCK 0
#endif
-static __always_inline void queued_spin_lock(struct qspinlock *lock)
+/*
+ * Seems to slow down lockstorm microbenchmark, suspect queue node just
+ * has to become shared again right afterwards when its waiter spins on
+ * the lock field.
+ */
+#define _Q_SPIN_PREFETCH_NEXT 0
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
{
- u32 val = 0;
+ return READ_ONCE(lock->val);
+}
- if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
- return;
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
+{
+ return !lock.val;
+}
- queued_spin_lock_slowpath(lock, val);
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
+{
+ return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
}
-#define queued_spin_lock queued_spin_lock
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define SPIN_THRESHOLD (1<<15) /* not tuned */
+static __always_inline u32 queued_spin_encode_locked_val(void)
+{
+ /* XXX: make this use lock value in paca like simple spinlocks? */
+ return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
+}
-static __always_inline void pv_wait(u8 *ptr, u8 val)
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
{
- if (*ptr != val)
- return;
- yield_to_any();
- /*
- * We could pass in a CPU here if waiting in the queue and yield to
- * the previous CPU in the queue.
- */
+ u32 new = queued_spin_encode_locked_val();
+ u32 prev;
+
+ /* Trylock succeeds only when unlocked and no queued nodes */
+ asm volatile(
+"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n"
+" cmpwi 0,%0,0 \n"
+" bne- 2f \n"
+" stwcx. %2,0,%1 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (new),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return likely(prev == 0);
}
-static __always_inline void pv_kick(int cpu)
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
{
- prod_cpu(cpu);
+ u32 new = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ /* Trylock may get ahead of queued nodes if it finds unlocked */
+ asm volatile(
+"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n"
+" andc. %1,%0,%4 \n"
+" bne- 2f \n"
+" and %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return likely(!(prev & ~_Q_TAIL_CPU_MASK));
}
-extern void __pv_init_lock_hash(void);
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+ if (!_Q_SPIN_TRY_LOCK_STEAL)
+ return __queued_spin_trylock_nosteal(lock);
+ else
+ return __queued_spin_trylock_steal(lock);
+}
-static inline void pv_spinlocks_init(void)
+void queued_spin_lock_slowpath(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
- __pv_init_lock_hash();
+ if (!queued_spin_trylock(lock))
+ queued_spin_lock_slowpath(lock);
}
-#endif
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ smp_store_release(&lock->locked, 0);
+ if (_Q_SPIN_MISO_UNLOCK)
+ asm volatile("miso" ::: "memory");
+}
-/*
- * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait,
- * which was found to have performance problems if implemented with
- * the preferred spin_begin()/spin_end() SMT priority pattern. Use the
- * generic version instead.
- */
+#define arch_spin_is_locked(l) queued_spin_is_locked(l)
+#define arch_spin_is_contended(l) queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
+#define arch_spin_lock(l) queued_spin_lock(l)
+#define arch_spin_trylock(l) queued_spin_trylock(l)
+#define arch_spin_unlock(l) queued_spin_unlock(l)
-#include <asm-generic/qspinlock.h>
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void);
+#else
+static inline void pv_spinlocks_init(void) { }
+#endif
#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
deleted file mode 100644
index 6b60e7736a47..000000000000
--- a/arch/powerpc/include/asm/qspinlock_paravirt.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-
-EXPORT_SYMBOL(__pv_queued_spin_unlock);
-
-#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
new file mode 100644
index 000000000000..4766a7aa03cb
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H
+#define _ASM_POWERPC_QSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef struct qspinlock {
+ union {
+ u32 val;
+
+#ifdef __LITTLE_ENDIAN
+ struct {
+ u16 locked;
+ u8 reserved[2];
+ };
+#else
+ struct {
+ u8 reserved[2];
+ u16 locked;
+ };
+#endif
+ };
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } }
+
+/*
+ * Bitfields in the lock word:
+ *
+ * 0: locked bit
+ * 1-14: lock holder cpu
+ * 15: lock owner or queuer vcpus observed to be preempted bit
+ * 16: must queue bit
+ * 17-31: tail cpu (+1)
+ */
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+/* 0x00000001 */
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 1
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+
+/* 0x00007ffe */
+#define _Q_OWNER_CPU_OFFSET 1
+#define _Q_OWNER_CPU_BITS 14
+#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU)
+
+#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+/* 0x00008000 */
+#define _Q_SLEEPY_OFFSET 15
+#define _Q_SLEEPY_BITS 1
+#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET)
+
+/* 0x00010000 */
+#define _Q_MUST_Q_OFFSET 16
+#define _Q_MUST_Q_BITS 1
+#define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET)
+
+/* 0xfffe0000 */
+#define _Q_TAIL_CPU_OFFSET 17
+#define _Q_TAIL_CPU_BITS 15
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 56319aea646e..479a95cb2770 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -33,21 +33,6 @@
#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */
#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */
-/*
- * In general to call RTAS use rtas_token("string") to lookup
- * an RTAS token for the given string (e.g. "event-scan").
- * To actually perform the call use
- * ret = rtas_call(token, n_in, n_out, ...)
- * Where n_in is the number of input parameters and
- * n_out is the number of output parameters
- *
- * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE
- * will be returned as a token. rtas_call() does look for this
- * token and error out gracefully so rtas_call(rtas_token("str"), ...)
- * may be safely used for one-shot calls to RTAS.
- *
- */
-
/* RTAS event classes */
#define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */
#define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index bd75872a6334..7dafca8e3f02 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -13,7 +13,7 @@
/* See include/linux/spinlock.h */
#define smp_mb__after_spinlock() smp_mb()
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#ifndef CONFIG_PPC_QUEUED_SPINLOCKS
static inline void pv_spinlocks_init(void) { }
#endif
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index d5f8a74ed2e8..40b01446cf75 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
#endif
#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
-#include <asm-generic/qspinlock_types.h>
+#include <asm/qspinlock_types.h>
#include <asm-generic/qrwlock_types.h>
#else
#include <asm/simple_spinlock_types.h>
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index a1142496cd58..6d51b007b59e 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -104,6 +104,13 @@ long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
unsigned long len1, unsigned long len2);
long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
size_t len, int advice);
+long sys_ppc_sync_file_range2(int fd, unsigned int flags,
+ unsigned int offset1,
+ unsigned int offset2,
+ unsigned int nbytes1,
+ unsigned int nbytes2);
+long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+ u32 len1, u32 len2);
#endif
#ifdef CONFIG_COMPAT
long compat_sys_mmap2(unsigned long addr, size_t len,
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ce2a4aa3985..d24a59a98c0c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -72,7 +72,7 @@
#endif
#define STACK_PT_REGS_OFFSET(sym, val) \
- DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
+ DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val))
int main(void)
{
@@ -167,9 +167,8 @@ int main(void)
OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
- /* Local pt_regs on stack for Transactional Memory funcs. */
- DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
- sizeof(struct pt_regs) + 16);
+ /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */
+ DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16);
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
@@ -261,7 +260,7 @@ int main(void)
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
- DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
+ DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -418,21 +417,18 @@ int main(void)
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets);
OFFSET(KVM_SDR1, kvm, arch.sdr1);
OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
- OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
- OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
OFFSET(VCPU_CPU, kvm_vcpu, cpu);
OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
@@ -449,16 +445,12 @@ int main(void)
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
- OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
- OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
- OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
- OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
@@ -486,8 +478,6 @@ int main(void)
OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
- OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
- OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
@@ -582,8 +572,6 @@ int main(void)
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
- HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
- HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
@@ -594,9 +582,6 @@ int main(void)
HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
- HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]);
- HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]);
- HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]);
HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
@@ -672,17 +657,6 @@ int main(void)
OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
#endif
-#ifdef CONFIG_KVM_XICS
- DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
- arch.xive_saved_state));
- DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
- arch.xive_cam_word));
- DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
- DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
- DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
- DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
-#endif
-
#ifdef CONFIG_KVM_EXIT_TIMING
OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5e0763be1549..5604c9a1ac22 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -117,7 +117,7 @@ transfer_to_syscall:
addi r12,r12,STACK_FRAME_REGS_MARKER@l
stw r9,_MSR(r1)
li r2, INTERRUPT_SYSCALL
- stw r12,8(r1)
+ stw r12,STACK_INT_FRAME_MARKER(r1)
stw r2,_TRAP(r1)
SAVE_GPR(0, r1)
SAVE_GPRS(3, 8, r1)
@@ -126,12 +126,12 @@ transfer_to_syscall:
kuep_lock
/* Calling convention has r3 = regs, r4 = orig r0 */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
mr r4,r0
bl system_call_exception
ret_from_syscall:
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
li r5,0
bl syscall_exit_prepare
#ifdef CONFIG_PPC_47x
@@ -218,9 +218,9 @@ ret_from_kernel_thread:
* in arch/ppc/kernel/process.c
*/
_GLOBAL(_switch)
- stwu r1,-INT_FRAME_SIZE(r1)
+ stwu r1,-SWITCH_FRAME_SIZE(r1)
mflr r0
- stw r0,INT_FRAME_SIZE+4(r1)
+ stw r0,SWITCH_FRAME_SIZE+4(r1)
/* r3-r12 are caller saved -- Cort */
SAVE_NVGPRS(r1)
stw r0,_NIP(r1) /* Return to switch caller */
@@ -251,7 +251,7 @@ _GLOBAL(_switch)
lwz r4,_NIP(r1) /* Return to _switch caller in new task */
mtlr r4
- addi r1,r1,INT_FRAME_SIZE
+ addi r1,r1,SWITCH_FRAME_SIZE
blr
.globl fast_exception_return
@@ -296,7 +296,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return)
.globl interrupt_return
interrupt_return:
lwz r4,_MSR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
andi. r0,r4,MSR_PR
beq .Lkernel_interrupt_return
bl interrupt_exit_user_prepare
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 930e36099015..3f86091e68b3 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,7 +358,6 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13); \
std r15,PACA_EXMC+EX_R15(r13)
-
/* Core exception code for all exceptions except TLB misses. */
#define EXCEPTION_COMMON_LVL(n, scratch, excf) \
exc_##n##_common: \
@@ -391,10 +390,11 @@ exc_##n##_common: \
std r10,_CCR(r1); /* store orig CR in stackframe */ \
std r9,GPR1(r1); /* store stack frame back link */ \
std r11,SOFTE(r1); /* and save it to stackframe */ \
- std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \
std r3,_TRAP(r1); /* set trap number */ \
std r0,RESULT(r1); /* clear regs->result */ \
- SAVE_NVGPRS(r1);
+ SAVE_NVGPRS(r1); \
+ SANITIZE_NVGPRS(); /* minimise speculation influence */
#define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -455,7 +455,7 @@ exc_##n##_bad_stack: \
EXCEPTION_COMMON(trapnum) \
ack(r8); \
CHECK_NAPPING(); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+ addi r3,r1,STACK_INT_FRAME_REGS; \
bl hdlr; \
b interrupt_return
@@ -504,7 +504,7 @@ __end_interrupts:
EXCEPTION_COMMON_CRIT(0x100)
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_nmi_exception
b ret_from_crit_except
@@ -515,7 +515,7 @@ __end_interrupts:
EXCEPTION_COMMON_MC(0x000)
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl machine_check_exception
b ret_from_mc_except
@@ -570,7 +570,7 @@ __end_interrupts:
std r14,_ESR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
EXCEPTION_COMMON(0x700)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl program_check_exception
REST_NVGPRS(r1)
b interrupt_return
@@ -586,7 +586,7 @@ __end_interrupts:
beq- 1f
bl load_up_fpu
b fast_interrupt_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_fp_unavailable_exception
b interrupt_return
@@ -606,7 +606,7 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl altivec_unavailable_exception
b interrupt_return
@@ -616,7 +616,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
bl altivec_assist_exception
@@ -643,7 +643,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
EXCEPTION_COMMON_CRIT(0x9f0)
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_BOOKE_WDT
bl WatchdogException
#else
@@ -664,7 +664,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0xf20)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return
@@ -731,7 +731,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
EXCEPTION_COMMON_CRIT(0xd00)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -802,7 +802,7 @@ kernel_dbg_exc:
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
EXCEPTION_COMMON_DBG(0xd08)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -812,7 +812,14 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260)
CHECK_NAPPING()
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
+ /*
+ * XXX: Returning from performance_monitor_exception taken as a
+ * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
+ * and could cause bugs in return or elsewhere. That case should just
+ * restore registers and return. There is a workaround for one known
+ * problem in interrupt_exit_kernel_prepare().
+ */
bl performance_monitor_exception
b interrupt_return
@@ -827,7 +834,7 @@ kernel_dbg_exc:
EXCEPTION_COMMON_CRIT(0x2a0)
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_nmi_exception
b ret_from_crit_except
@@ -839,7 +846,7 @@ kernel_dbg_exc:
GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x2c0)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return
@@ -850,7 +857,7 @@ kernel_dbg_exc:
EXCEPTION_COMMON_CRIT(0x2e0)
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_nmi_exception
b ret_from_crit_except
@@ -859,7 +866,7 @@ kernel_dbg_exc:
NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x310)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return
@@ -868,7 +875,7 @@ kernel_dbg_exc:
NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x320)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return
@@ -877,7 +884,7 @@ kernel_dbg_exc:
NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return
@@ -972,7 +979,7 @@ masked_interrupt_book3e_0x2c0:
* original values stashed away in the PACA
*/
storage_fault_common:
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_page_fault
b interrupt_return
@@ -981,7 +988,7 @@ storage_fault_common:
* continues here.
*/
alignment_more:
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl alignment_exception
REST_NVGPRS(r1)
b interrupt_return
@@ -1062,7 +1069,7 @@ bad_stack_book3e:
ZEROIZE_GPR(12)
std r12,0(r11)
LOAD_PACA_TOC()
-1: addi r3,r1,STACK_FRAME_OVERHEAD
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_bad_stack
b 1b
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 77201ad9f329..6441a1ba57ac 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -112,6 +112,7 @@ name:
#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
#define __ISTACK(name) .L_ISTACK_ ## name
#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */
#define INT_DEFINE_BEGIN(n) \
.macro int_define_ ## n name
@@ -177,6 +178,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+ .ifndef IMSR_R12
+ IMSR_R12=0
+ .endif
.endm
/*
@@ -503,6 +507,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1) /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe */
std r10,GPR1(r1) /* save r1 in stackframe */
+ SANITIZE_GPR(0)
/* Mark our [H]SRRs valid for return */
li r10,1
@@ -545,8 +550,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+ .if !IMSR_R12
+ SANITIZE_GPRS(9, 12)
+ .else
+ SANITIZE_GPRS(9, 11)
+ .endif
SAVE_NVGPRS(r1)
+ SANITIZE_NVGPRS()
.if IDAR
.if IISIDE
@@ -578,8 +589,8 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
- std r2,GPR2(r1) /* save r2 in stackframe */
- SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */
+ SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */
+ SANITIZE_GPRS(2, 8)
mflr r9 /* Get LR, later save to stack */
LOAD_PACA_TOC() /* get kernel TOC into r2 */
std r9,_LINK(r1)
@@ -592,7 +603,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
li r10,0
LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
std r10,RESULT(r1) /* clear regs->result */
- std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+ std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */
.endm
/*
@@ -697,6 +708,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlr r9
ld r9,_CCR(r1)
mtcr r9
+ SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1062,7 +1074,7 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY system_reset
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl system_reset_exception
/* Clear MSR_RI before setting SRR0 and SRR1. */
@@ -1209,7 +1221,7 @@ EXC_COMMON_BEGIN(machine_check_early_common)
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_FTR_SECTION
bl machine_check_early_boot
END_FTR_SECTION(0, 1) // nop out after boot
@@ -1299,7 +1311,7 @@ EXC_COMMON_BEGIN(machine_check_common)
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
GEN_COMMON machine_check
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl machine_check_exception_async
b interrupt_return_srr
@@ -1365,14 +1377,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* This is the NMI version of the handler because we are called from
* the early handler which is a true NMI.
*/
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl machine_check_exception
/*
* We will not reach here. Even if we did, there is no way out.
* Call unrecoverable_exception and die.
*/
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unrecoverable_exception
b .
@@ -1423,7 +1435,7 @@ EXC_VIRT_END(data_access, 0x4300, 0x80)
EXC_COMMON_BEGIN(data_access_common)
GEN_COMMON data_access
ld r4,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
andis. r0,r4,DSISR_DABRMATCH@h
bne- 1f
#ifdef CONFIG_PPC_64S_HASH_MMU
@@ -1442,7 +1454,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
* do_break() may have changed the NV GPRS while handling a breakpoint.
* If so, we need to restore them with their updated values.
*/
- REST_NVGPRS(r1)
+ HANDLER_RESTORE_NVGPRS()
b interrupt_return_srr
@@ -1480,7 +1492,7 @@ EXC_COMMON_BEGIN(data_access_slb_common)
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_slb_fault
cmpdi r3,0
bne- 1f
@@ -1494,7 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
li r3,-EFAULT
#endif
std r3,RESULT(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1526,7 +1538,7 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
@@ -1568,7 +1580,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_slb_fault
cmpdi r3,0
bne- 1f
@@ -1582,7 +1594,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
li r3,-EFAULT
#endif
std r3,RESULT(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1636,7 +1648,7 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_IRQ
BEGIN_FTR_SECTION
b interrupt_return_hsrr
@@ -1666,9 +1678,9 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
EXC_VIRT_END(alignment, 0x4600, 0x100)
EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl alignment_exception
- REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1732,9 +1744,9 @@ EXC_COMMON_BEGIN(program_check_common)
__GEN_COMMON_BODY program_check
.Ldo_program_check:
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl program_check_exception
- REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1752,6 +1764,7 @@ INT_DEFINE_BEGIN(fp_unavailable)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ IMSR_R12=1
INT_DEFINE_END(fp_unavailable)
EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
@@ -1763,7 +1776,7 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_fp_unavailable_exception
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
@@ -1781,7 +1794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl fp_unavailable_tm
b interrupt_return_srr
#endif
@@ -1825,7 +1838,7 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl timer_interrupt
b interrupt_return_srr
@@ -1910,7 +1923,7 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
@@ -2077,7 +2090,7 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
EXC_VIRT_END(single_step, 0x4d00, 0x100)
EXC_COMMON_BEGIN(single_step_common)
GEN_COMMON single_step
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl single_step_exception
b interrupt_return_srr
@@ -2111,7 +2124,7 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_MMU_FTR_SECTION
bl do_bad_page_fault_segv
MMU_FTR_SECTION_ELSE
@@ -2140,7 +2153,7 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_COMMON h_instr_storage
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return_hsrr
@@ -2163,9 +2176,9 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl emulation_assist_interrupt
- REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_hsrr
@@ -2223,7 +2236,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
__GEN_COMMON_BODY hmi_exception_early
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl hmi_exception_realmode
cmpdi cr0,r3,0
bne 1f
@@ -2241,7 +2254,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl handle_hmi_exception
b interrupt_return_hsrr
@@ -2275,7 +2288,7 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
bl doorbell_exception
#else
@@ -2311,7 +2324,7 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_IRQ
b interrupt_return_hsrr
@@ -2357,10 +2370,22 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl performance_monitor_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ lbz r4,PACAIRQSOFTMASK(r13)
+ cmpdi r4,IRQS_ENABLED
+ bne 1f
+ bl performance_monitor_exception_async
b interrupt_return_srr
+1:
+ bl performance_monitor_exception_nmi
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ kuap_kernel_restore r9, r10
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
@@ -2373,6 +2398,7 @@ INT_DEFINE_BEGIN(altivec_unavailable)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ IMSR_R12=1
INT_DEFINE_END(altivec_unavailable)
EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
@@ -2399,14 +2425,14 @@ BEGIN_FTR_SECTION
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl altivec_unavailable_tm
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl altivec_unavailable_exception
b interrupt_return_srr
@@ -2422,6 +2448,7 @@ INT_DEFINE_BEGIN(vsx_unavailable)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ IMSR_R12=1
INT_DEFINE_END(vsx_unavailable)
EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
@@ -2447,14 +2474,14 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl vsx_unavailable_tm
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl vsx_unavailable_exception
b interrupt_return_srr
@@ -2481,9 +2508,9 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_COMMON facility_unavailable
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl facility_unavailable_exception
- REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -2509,9 +2536,10 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
EXC_COMMON_BEGIN(h_facility_unavailable_common)
GEN_COMMON h_facility_unavailable
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl facility_unavailable_exception
- REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
+ /* XXX Shouldn't be necessary in practice */
+ HANDLER_RESTORE_NVGPRS()
b interrupt_return_hsrr
@@ -2539,7 +2567,7 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
EXC_COMMON_BEGIN(cbe_system_error_common)
GEN_COMMON cbe_system_error
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl cbe_system_error_exception
b interrupt_return_hsrr
@@ -2570,7 +2598,7 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
EXC_COMMON_BEGIN(instruction_breakpoint_common)
GEN_COMMON instruction_breakpoint
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl instruction_breakpoint_exception
b interrupt_return_srr
@@ -2692,7 +2720,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
EXC_COMMON_BEGIN(denorm_exception_common)
GEN_COMMON denorm_exception
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
b interrupt_return_hsrr
@@ -2709,7 +2737,7 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
EXC_COMMON_BEGIN(cbe_maintenance_common)
GEN_COMMON cbe_maintenance
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl cbe_maintenance_exception
b interrupt_return_hsrr
@@ -2734,10 +2762,10 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
EXC_COMMON_BEGIN(altivec_assist_common)
GEN_COMMON altivec_assist
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
bl altivec_assist_exception
- REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
#else
bl unknown_exception
#endif
@@ -2756,7 +2784,7 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
EXC_COMMON_BEGIN(cbe_thermal_common)
GEN_COMMON cbe_thermal
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl cbe_thermal_exception
b interrupt_return_hsrr
@@ -2789,7 +2817,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
subi r1,r1,INT_FRAME_SIZE
__GEN_COMMON_BODY soft_nmi
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl soft_nmi_interrupt
/* Clear MSR_RI before setting SRR0 and SRR1. */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index c3286260a7d1..f8e2911478a7 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -112,7 +112,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
stw r0,GPR0(r1)
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addi r10,r10,STACK_FRAME_REGS_MARKER@l
- stw r10,8(r1)
+ stw r10,STACK_INT_FRAME_MARKER(r1)
li r10, \trapno
stw r10,_TRAP(r1)
SAVE_GPRS(3, 8, r1)
@@ -127,7 +127,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
mfspr r10,SPRN_XER
addi r2, r2, -THREAD
stw r10,_XER(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
.endm
.macro prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 9110fe9d6747..3f68a1624646 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -604,7 +604,7 @@ start_here:
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
bl early_init /* We have to do this with MMU on */
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index f15cb9fdb692..63a85c16fef4 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -109,7 +109,7 @@ _GLOBAL(_start);
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
bl early_init
@@ -1012,7 +1012,7 @@ _GLOBAL(start_secondary_47x)
*/
lis r1,temp_boot_stack@h
ori r1,r1,temp_boot_stack@l
- addi r1,r1,1024-STACK_FRAME_OVERHEAD
+ addi r1,r1,1024-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
bl mmu_init_secondary
@@ -1025,7 +1025,7 @@ _GLOBAL(start_secondary_47x)
lwz r1,TASK_STACK(r2)
/* Current stack pointer */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 874efd25cc45..7558ba4eb864 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -425,7 +425,7 @@ generic_secondary_common_init:
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ subi r1,r1,STACK_FRAME_MIN_SIZE
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
@@ -782,7 +782,7 @@ _GLOBAL(pmac_secondary_start)
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ subi r1,r1,STACK_FRAME_MIN_SIZE
b __secondary_start
@@ -961,7 +961,7 @@ start_here_multiplatform:
LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
add r1,r3,r1
li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
+ stdu r0,-STACK_FRAME_MIN_SIZE(r1)
/*
* Do very early kernel initializations, including initial hash table
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 6be3cc36b716..d438ca74e96c 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -231,7 +231,7 @@ set_ivor:
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
#ifdef CONFIG_SMP
stw r24, TASK_CPU(r2)
@@ -975,10 +975,10 @@ _GLOBAL(__giveup_spe)
li r4,THREAD_ACC
evstddx evr6, r4, r3 /* save off accumulator */
beq 1f
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lwz r4,_MSR-STACK_INT_FRAME_REGS(r5)
lis r3,MSR_SPE@h
andc r4,r4,r3 /* disable SPE for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ stw r4,_MSR-STACK_INT_FRAME_REGS(r5)
1:
blr
#endif /* CONFIG_SPE */
@@ -1047,7 +1047,7 @@ __secondary_start:
lwz r1,TASK_STACK(r2)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c94ed5a08c93..a79751e05781 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -539,7 +539,7 @@ start_here:
ori r0, r0, STACK_END_MAGIC@l
stw r0, 0(r1)
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
lis r6, swapper_pg_dir@ha
tophys(r6,r6)
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 4af12447dc0b..c51f28b5abc0 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -842,7 +842,7 @@ __secondary_start:
lwz r1,TASK_STACK(r1)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
tophys(r3,r1)
stw r0,0(r3)
@@ -970,7 +970,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
/*
* Do early platform-specific initialization,
* and set up the MMU.
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1cb9d0f7cbf2..37d43c172676 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -84,7 +84,7 @@ END_BTB_FLUSH_SECTION
stw r0,GPR0(r1)
lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
addi r10, r10, STACK_FRAME_REGS_MARKER@l
- stw r10, 8(r1)
+ stw r10, STACK_INT_FRAME_MARKER(r1)
li r10, \trapno
stw r10,_TRAP(r1)
SAVE_GPRS(3, 8, r1)
@@ -99,7 +99,7 @@ END_BTB_FLUSH_SECTION
mfspr r10,SPRN_XER
addi r2, r2, -THREAD
stw r10,_XER(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
.endm
.macro prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 8db1a15d7acb..e1b4e70c8fd0 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -646,7 +646,7 @@ int hw_breakpoint_handler(struct die_args *args)
ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
- unsigned long ea;
+ unsigned long ea = 0;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index f9db0a172401..fc6631a80527 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
/*
- * CT_WARN_ON comes here via program_check_exception,
- * so avoid recursion.
+ * CT_WARN_ON comes here via program_check_exception, so avoid
+ * recursion.
+ *
+ * Skip the assertion on PMIs on 64e to work around a problem caused
+ * by NMI PMIs incorrectly taking this interrupt return path, it's
+ * possible for this to hit after interrupt exit to user switches
+ * context to user. See also the comment in the performance monitor
+ * handler in exceptions-64e.S
*/
- if (TRAP(regs) != INTERRUPT_PROGRAM)
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+ TRAP(regs) != INTERRUPT_PROGRAM &&
+ TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked();
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 978a173eb339..fccc34489add 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -77,11 +77,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
/* Calling convention has r3 = regs, r4 = orig r0 */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
mr r4,r0
- LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
- std r11,-16(r3) /* "regshere" marker */
BEGIN_FTR_SECTION
HMT_MEDIUM
@@ -96,10 +96,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* but this is the best we can do.
*/
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
bl system_call_exception
.Lsyscall_vectored_\name\()_exit:
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
li r5,1 /* scv */
bl syscall_exit_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -124,6 +129,7 @@ BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ SANITIZE_RESTORE_NVGPRS()
cmpdi r3,0
bne .Lsyscall_vectored_\name\()_restore_regs
@@ -159,7 +165,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r4,_LINK(r1)
ld r5,_XER(r1)
- REST_NVGPRS(r1)
+ HANDLER_RESTORE_NVGPRS()
REST_GPR(0, r1)
mtcr r2
mtctr r3
@@ -176,7 +182,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
ld r3,RESULT(r1)
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl syscall_exit_restart
@@ -250,11 +256,11 @@ END_BTB_FLUSH_SECTION
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
/* Calling convention has r3 = regs, r4 = orig r0 */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
mr r4,r0
- LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
- std r11,-16(r3) /* "regshere" marker */
#ifdef CONFIG_PPC_BOOK3S
li r11,1
@@ -275,10 +281,15 @@ END_BTB_FLUSH_SECTION
wrteei 1
#endif
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
bl system_call_exception
.Lsyscall_exit:
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
li r5,0 /* !scv */
bl syscall_exit_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -315,6 +326,7 @@ BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+ SANITIZE_RESTORE_NVGPRS()
cmpdi r3,0
bne .Lsyscall_restore_regs
/* Zero volatile regs that may contain sensitive kernel data */
@@ -342,7 +354,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
.Lsyscall_restore_regs:
ld r3,_CTR(r1)
ld r4,_XER(r1)
- REST_NVGPRS(r1)
+ HANDLER_RESTORE_NVGPRS()
mtctr r3
mtspr SPRN_XER,r4
REST_GPR(0, r1)
@@ -357,7 +369,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
ld r3,RESULT(r1)
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl syscall_exit_restart
@@ -388,7 +400,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
andi. r0,r5,MSR_RI
li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
bne+ .Lfast_kernel_interrupt_return_srr
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unrecoverable_exception
b . /* should not get here */
#else
@@ -406,11 +418,13 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
beq interrupt_return_\srr\()_kernel
interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl interrupt_exit_user_prepare
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi r3,0
bne- .Lrestore_nvgprs_\srr
.Lrestore_nvgprs_\srr\()_cont:
+#endif
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
#ifdef CONFIG_PPC_BOOK3S
.Linterrupt_return_\srr\()_user_rst_start:
@@ -424,6 +438,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
.Lfast_user_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
#ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
lbz r4,PACASRR_VALID(r13)
@@ -493,9 +508,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
b . /* prevent speculative execution */
.Linterrupt_return_\srr\()_user_rst_end:
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
.Lrestore_nvgprs_\srr\():
REST_NVGPRS(r1)
b .Lrestore_nvgprs_\srr\()_cont
+#endif
#ifdef CONFIG_PPC_BOOK3S
interrupt_return_\srr\()_user_restart:
@@ -503,7 +520,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl interrupt_exit_user_restart
@@ -518,7 +535,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr
.balign IFETCH_ALIGN_BYTES
interrupt_return_\srr\()_kernel:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl interrupt_exit_kernel_prepare
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -532,15 +549,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
+ *
+ * The MSR[EE] check catches among other things the short incoherency
+ * in hard_irq_disable() between clearing MSR[EE] and setting
+ * PACA_IRQ_HARD_DIS.
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
- beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
+ bne 1f // HARD_MASK is pending
+ // No HARD_MASK pending, clear possible HARD_DIS set by interrupt
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ stb r11,PACAIRQHAPPENED(r13)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
- /* Must clear MSR_EE from _MSR */
+1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */
@@ -576,6 +602,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
.Lfast_kernel_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
cmpdi cr1,r3,0
#ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
@@ -628,7 +655,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
* Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
* the reliable stack unwinder later on. Clear it.
*/
- std r0,STACK_FRAME_OVERHEAD-16(r1)
+ std r0,STACK_INT_FRAME_MARKER(r1)
REST_GPRS(2, 5, r1)
@@ -675,7 +702,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
GET_PACA(r13)
ld r1,PACA_EXIT_SAVE_R1(r13)
LOAD_PACA_TOC()
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
bl interrupt_exit_kernel_restart
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9ede61a5a469..c5b9ce887483 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -210,7 +210,7 @@ static __always_inline void call_do_softirq(const void *sp)
PPC_LL " %%r1, 0(%%r1) ;"
: // Outputs
: // Inputs
- [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
[callee] "i" (__do_softirq)
: // Clobbers
"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
@@ -264,7 +264,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
: // Outputs
"+r" (r3)
: // Inputs
- [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
[callee] "i" (__do_irq)
: // Clobbers
"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 1a1e9995dae3..ebe4d1645ca1 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -191,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs)
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
- STACK_FRAME_OVERHEAD);
+ STACK_INT_FRAME_REGS);
unsigned long *ptr = gdb_regs;
int reg;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bd7b1a035459..86ca5a61ea9a 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -158,9 +158,7 @@ int arch_prepare_kprobe(struct kprobe *p)
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
- preempt_disable();
prev = get_kprobe(p->addr - 1);
- preempt_enable_no_resched();
/*
* When prev is a ftrace-based kprobe, we don't have an insn, and it
@@ -371,7 +369,7 @@ int kprobe_handler(struct pt_regs *regs)
if (ret > 0) {
restore_previous_kprobe(kcb);
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
}
@@ -384,7 +382,7 @@ int kprobe_handler(struct pt_regs *regs)
if (p->pre_handler && p->pre_handler(p, regs)) {
/* handler changed execution path, so skip ss setup */
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
@@ -397,7 +395,7 @@ int kprobe_handler(struct pt_regs *regs)
kcb->kprobe_status = KPROBE_HIT_SSDONE;
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
}
@@ -406,7 +404,7 @@ int kprobe_handler(struct pt_regs *regs)
return 1;
no_kprobe:
- preempt_enable_no_resched();
+ preempt_enable();
return ret;
}
NOKPROBE_SYMBOL(kprobe_handler);
@@ -492,7 +490,7 @@ int kprobe_post_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
+ preempt_enable();
/*
* if somebody else is singlestepping across a probe point, msr
@@ -531,7 +529,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e5127b19fec2..daf8f87d2372 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -382,7 +382,7 @@ EXPORT_SYMBOL(__bswapdi2)
_GLOBAL(start_secondary_resume)
/* Reset stack */
rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
bl start_secondary
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c61a7ba446a8..c39c07a4c06e 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -386,7 +386,7 @@ _GLOBAL(kexec_sequence)
std r0,16(r1)
/* switch stacks to newstack -- &kexec_stack.stack */
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3)
mr r1,r3
li r0,0
@@ -403,7 +403,7 @@ _GLOBAL(kexec_sequence)
std r26,-48(r1)
std r25,-56(r1)
- stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
+ stdu r1,-STACK_FRAME_MIN_SIZE-64(r1)
/* save args into preserved regs */
mr r31,r3 /* newstack (both) */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 7e45dc98df8a..ff045644f13f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -31,6 +31,16 @@
this, and makes other things simpler. Anton?
--RR. */
+bool module_elf_check_arch(Elf_Ehdr *hdr)
+{
+ unsigned long abi_level = hdr->e_flags & 0x3;
+
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ return abi_level == 2;
+ else
+ return abi_level < 2;
+}
+
#ifdef CONFIG_PPC64_ELF_ABI_V2
static func_desc_t func_desc(unsigned long addr)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 3b1c2236cbee..004fae2044a3 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op,
__this_cpu_write(current_kprobe, NULL);
}
- preempt_enable_no_resched();
+ preempt_enable();
}
NOKPROBE_SYMBOL(optimized_callback);
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index cd4e7bc32609..35932f45fb4e 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -85,7 +85,7 @@ optprobe_template_op_address:
TEMPLATE_FOR_IMM_LOAD_INSNS
/* 2. pt_regs pointer in r4 */
- addi r4,r1,STACK_FRAME_OVERHEAD
+ addi r4,r1,STACK_INT_FRAME_REGS
.global optprobe_template_call_handler
optprobe_template_call_handler:
@@ -96,7 +96,7 @@ optprobe_template_call_handler:
* Parameters for instruction emulation:
* 1. Pass SP in register r3.
*/
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
.global optprobe_template_insn
optprobe_template_insn:
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 2d4d21bb46a9..49813f982468 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -21,60 +21,33 @@
* different ABIs, though).
*/
_GLOBAL(ppc_save_regs)
- PPC_STL r0,0*SZL(r3)
+ /* This allows stack frame accessor macros and offsets to be used */
+ subi r3,r3,STACK_INT_FRAME_REGS
+ PPC_STL r0,GPR0(r3)
#ifdef CONFIG_PPC32
- stmw r2, 2*SZL(r3)
+ stmw r2,GPR2(r3)
#else
- PPC_STL r2,2*SZL(r3)
- PPC_STL r3,3*SZL(r3)
- PPC_STL r4,4*SZL(r3)
- PPC_STL r5,5*SZL(r3)
- PPC_STL r6,6*SZL(r3)
- PPC_STL r7,7*SZL(r3)
- PPC_STL r8,8*SZL(r3)
- PPC_STL r9,9*SZL(r3)
- PPC_STL r10,10*SZL(r3)
- PPC_STL r11,11*SZL(r3)
- PPC_STL r12,12*SZL(r3)
- PPC_STL r13,13*SZL(r3)
- PPC_STL r14,14*SZL(r3)
- PPC_STL r15,15*SZL(r3)
- PPC_STL r16,16*SZL(r3)
- PPC_STL r17,17*SZL(r3)
- PPC_STL r18,18*SZL(r3)
- PPC_STL r19,19*SZL(r3)
- PPC_STL r20,20*SZL(r3)
- PPC_STL r21,21*SZL(r3)
- PPC_STL r22,22*SZL(r3)
- PPC_STL r23,23*SZL(r3)
- PPC_STL r24,24*SZL(r3)
- PPC_STL r25,25*SZL(r3)
- PPC_STL r26,26*SZL(r3)
- PPC_STL r27,27*SZL(r3)
- PPC_STL r28,28*SZL(r3)
- PPC_STL r29,29*SZL(r3)
- PPC_STL r30,30*SZL(r3)
- PPC_STL r31,31*SZL(r3)
+ SAVE_GPRS(2, 31, r3)
lbz r0,PACAIRQSOFTMASK(r13)
- PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,SOFTE(r3)
#endif
/* go up one stack frame for SP */
PPC_LL r4,0(r1)
- PPC_STL r4,1*SZL(r3)
+ PPC_STL r4,GPR1(r3)
/* get caller's LR */
PPC_LL r0,LRSAVE(r4)
- PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK(r3)
mflr r0
- PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_NIP(r3)
mfmsr r0
- PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_MSR(r3)
mfctr r0
- PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CTR(r3)
mfxer r0
- PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_XER(r3)
mfcr r0
- PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CCR(r3)
li r0,0
- PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_TRAP(r3)
+ PPC_STL r0,ORIG_GPR3(r3)
blr
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 67da147fe34d..edb46d0806ef 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
return 0;
}
-void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
{
- memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
-
if (dawr_enabled())
// Power8 or later
set_dawr(nr, brk);
@@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
WARN_ON_ONCE(1);
}
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+ memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
+ set_hw_breakpoint(nr, brk);
+}
+
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
@@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void)
}
EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
+{
+ struct arch_hw_breakpoint brk = {0};
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
+}
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline bool tm_enabled(struct task_struct *tsk)
@@ -1359,7 +1391,7 @@ static void show_instructions(struct pt_regs *regs)
unsigned long nip = regs->nip;
unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- printk("Instruction dump:");
+ printk("Code: ");
/*
* If we were executing with the MMU off for instructions, adjust pc
@@ -1373,9 +1405,6 @@ static void show_instructions(struct pt_regs *regs)
for (i = 0; i < NR_INSN_TO_PRINT; i++) {
int instr;
- if (!(i % 8))
- pr_cont("\n");
-
if (!__kernel_text_address(pc) ||
get_kernel_nofault(instr, (const void *)pc)) {
pr_cont("XXXXXXXX ");
@@ -1726,13 +1755,17 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
klp_init_thread_info(p);
+ /* Create initial stack frame. */
+ sp -= STACK_USER_INT_FRAME_SIZE;
+ *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
/* Copy registers */
- sp -= sizeof(struct pt_regs);
- childregs = (struct pt_regs *) sp;
+ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
if (unlikely(args->fn)) {
/* kernel thread */
+ ((unsigned long *)sp)[0] = 0;
memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gpr[1] = sp + sizeof(struct pt_regs);
+ childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE;
/* function */
if (args->fn)
childregs->gpr[14] = ppc_function_entry((void *)args->fn);
@@ -1750,6 +1783,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
*childregs = *regs;
if (usp)
childregs->gpr[1] = usp;
+ ((unsigned long *)sp)[0] = childregs->gpr[1];
p->thread.regs = childregs;
/* 64s sets this in ret_from_fork */
if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
@@ -1767,7 +1801,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
f = ret_from_fork;
}
childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
- sp -= STACK_FRAME_OVERHEAD;
/*
* The way this works is that at some point in the future
@@ -1777,11 +1810,12 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
- ((unsigned long *)sp)[0] = 0;
- sp -= sizeof(struct pt_regs);
- kregs = (struct pt_regs *) sp;
- sp -= STACK_FRAME_OVERHEAD;
+ ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
+ sp -= STACK_SWITCH_FRAME_SIZE;
+ ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
+ kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
p->thread.ksp = sp;
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
for (i = 0; i < nr_wp_slots(); i++)
p->thread.ptrace_bps[i] = NULL;
@@ -2123,9 +2157,12 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
return 0;
}
-
-int validate_sp(unsigned long sp, struct task_struct *p,
- unsigned long nbytes)
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
@@ -2141,7 +2178,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
return valid_emergency_stack(sp, p, nbytes);
}
-EXPORT_SYMBOL(validate_sp);
+int validate_sp(unsigned long sp, struct task_struct *p)
+{
+ return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
+}
static unsigned long ___get_wchan(struct task_struct *p)
{
@@ -2149,13 +2189,12 @@ static unsigned long ___get_wchan(struct task_struct *p)
int count = 0;
sp = p->thread.ksp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p))
return 0;
do {
sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
- task_is_running(p))
+ if (!validate_sp(sp, p) || task_is_running(p))
return 0;
if (count > 0) {
ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
@@ -2209,7 +2248,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
lr = 0;
printk("%sCall Trace:\n", loglvl);
do {
- if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, tsk))
break;
stack = (unsigned long *) sp;
@@ -2230,12 +2269,16 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
/*
* See if this is an exception frame.
- * We look for the "regshere" marker in the current frame.
+ * We look for the "regs" marker in the current frame.
+ *
+ * STACK_SWITCH_FRAME_SIZE being the smallest frame that
+ * could hold a pt_regs, if that does not fit then it can't
+ * have regs.
*/
- if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
- && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
+ && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
- (sp + STACK_FRAME_OVERHEAD);
+ (sp + STACK_INT_FRAME_REGS);
lr = regs->link;
printk("%s--- interrupt: %lx at %pS\n",
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1eed87d954ba..4f1c920aa13e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -72,6 +72,7 @@ int __initdata iommu_is_off;
int __initdata iommu_force_on;
unsigned long tce_alloc_start, tce_alloc_end;
u64 ppc64_rma_size;
+unsigned int boot_cpu_node_count __ro_after_init;
#endif
static phys_addr_t first_memblock_size;
static int __initdata boot_cpu_count;
@@ -335,6 +336,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
+ if (IS_ENABLED(CONFIG_PPC64))
+ boot_cpu_node_count++;
+
/* Get physical cpuid */
intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e847f9b1c5b9..deded51a7978 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,43 +7,35 @@
* Copyright (C) 2001 IBM.
*/
-#include <linux/stdarg.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/init.h>
+#define pr_fmt(fmt) "rtas: " fmt
+
#include <linux/capability.h>
#include <linux/delay.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/memblock.h>
-#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/reboot.h>
+#include <linux/sched.h>
#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stdarg.h>
#include <linux/syscalls.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/delay.h>
+#include <asm/firmware.h>
#include <asm/interrupt.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
#include <asm/machdep.h>
-#include <asm/firmware.h>
+#include <asm/mmu.h>
#include <asm/page.h>
-#include <asm/param.h>
-#include <asm/delay.h>
-#include <linux/uaccess.h>
-#include <asm/udbg.h>
-#include <asm/syscalls.h>
-#include <asm/smp.h>
-#include <linux/atomic.h>
+#include <asm/rtas.h>
#include <asm/time.h>
-#include <asm/mmu.h>
-#include <asm/topology.h>
+#include <asm/udbg.h>
/* This is here deliberately so it's only used in this file */
void enter_rtas(unsigned long);
@@ -353,6 +345,9 @@ int rtas_service_present(const char *service)
EXPORT_SYMBOL(rtas_service_present);
#ifdef CONFIG_RTAS_ERROR_LOGGING
+
+static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
+
/*
* Return the firmware-specified size of the error log buffer
* for all rtas calls that require an error buffer argument.
@@ -360,21 +355,30 @@ EXPORT_SYMBOL(rtas_service_present);
*/
int rtas_get_error_log_max(void)
{
- static int rtas_error_log_max;
- if (rtas_error_log_max)
- return rtas_error_log_max;
-
- rtas_error_log_max = rtas_token ("rtas-error-log-max");
- if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
- (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
- printk (KERN_WARNING "RTAS: bad log buffer size %d\n",
- rtas_error_log_max);
- rtas_error_log_max = RTAS_ERROR_LOG_MAX;
- }
return rtas_error_log_max;
}
EXPORT_SYMBOL(rtas_get_error_log_max);
+static void __init init_error_log_max(void)
+{
+ static const char propname[] __initconst = "rtas-error-log-max";
+ u32 max;
+
+ if (of_property_read_u32(rtas.dev, propname, &max)) {
+ pr_warn("%s not found, using default of %u\n",
+ propname, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ if (max > RTAS_ERROR_LOG_MAX) {
+ pr_warn("%s = %u, clamping max error log size to %u\n",
+ propname, max, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ rtas_error_log_max = max;
+}
+
static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
static int rtas_last_error_token;
@@ -432,6 +436,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
#else /* CONFIG_RTAS_ERROR_LOGGING */
#define __fetch_rtas_last_error(x) NULL
#define get_errorlog_buffer() NULL
+static void __init init_error_log_max(void) {}
#endif
@@ -467,6 +472,64 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
static int ibm_open_errinjct_token;
static int ibm_errinjct_token;
+/**
+ * rtas_call() - Invoke an RTAS firmware function.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @outputs: Array of @nret output words.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_token().
+ *
+ * The @nargs and @nret arguments must match the number of input and
+ * output parameters specified for the RTAS function.
+ *
+ * rtas_call() returns RTAS status codes, not conventional Linux errno
+ * values. Callers must translate any failure to an appropriate errno
+ * in syscall context. Most callers of RTAS functions that can return
+ * -2 or 990x should use rtas_busy_delay() to correctly handle those
+ * statuses before calling again.
+ *
+ * The return value descriptions are adapted from 7.2.8 [RTAS] Return
+ * Codes of the PAPR and CHRP specifications.
+ *
+ * Context: Process context preferably, interrupt context if
+ * necessary. Acquires an internal spinlock and may perform
+ * GFP_ATOMIC slab allocation in error path. Unsafe for NMI
+ * context.
+ * Return:
+ * * 0 - RTAS function call succeeded.
+ * * -1 - RTAS function encountered a hardware or
+ * platform error, or the token is invalid,
+ * or the function is restricted by kernel policy.
+ * * -2 - Specs say "A necessary hardware device was busy,
+ * and the requested function could not be
+ * performed. The operation should be retried at
+ * a later time." This is misleading, at least with
+ * respect to current RTAS implementations. What it
+ * usually means in practice is that the function
+ * could not be completed while meeting RTAS's
+ * deadline for returning control to the OS (250us
+ * for PAPR/PowerVM, typically), but the call may be
+ * immediately reattempted to resume work on it.
+ * * -3 - Parameter error.
+ * * -7 - Unexpected state change.
+ * * 9000...9899 - Vendor-specific success codes.
+ * * 9900...9905 - Advisory extended delay. Caller should try
+ * again after ~10^x ms has elapsed, where x is
+ * the last digit of the status [0-5]. Again going
+ * beyond the PAPR text, 990x on PowerVM indicates
+ * contention for RTAS-internal resources. Other
+ * RTAS call sequences in progress should be
+ * allowed to complete before reattempting the
+ * call.
+ * * -9000 - Multi-level isolation error.
+ * * -9999...-9004 - Vendor-specific error codes.
+ * * Additional negative values - Function-specific error.
+ * * Additional positive values - Function-specific success.
+ */
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
va_list list;
@@ -657,8 +720,7 @@ static int rtas_error_rc(int rtas_rc)
rc = -ENODEV;
break;
default:
- printk(KERN_ERR "%s: unexpected RTAS error %d\n",
- __func__, rtas_rc);
+ pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
rc = -ERANGE;
break;
}
@@ -862,8 +924,8 @@ void __noreturn rtas_restart(char *cmd)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_RESTART);
- printk("RTAS system-reboot returned %d\n",
- rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
+ pr_emerg("system-reboot returned %d\n",
+ rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
for (;;);
}
@@ -872,8 +934,8 @@ void rtas_power_off(void)
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_POWER_OFF);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
for (;;);
}
@@ -882,13 +944,14 @@ void __noreturn rtas_halt(void)
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_HALT);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
for (;;);
}
/* Must be in the RMO region, so we place it here */
static char rtas_os_term_buf[2048];
+static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE;
void rtas_os_term(char *str)
{
@@ -900,19 +963,23 @@ void rtas_os_term(char *str)
* this property may terminate the partition which we want to avoid
* since it interferes with panic_timeout.
*/
- if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
- RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
+ if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE)
return;
snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+ /*
+ * Keep calling as long as RTAS returns a "try again" status,
+ * but don't use rtas_busy_delay(), which potentially
+ * schedules.
+ */
do {
- status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
+ status = rtas_call(ibm_os_term_token, 1, 1, NULL,
__pa(rtas_os_term_buf));
- } while (rtas_busy_delay(status));
+ } while (rtas_busy_delay_time(status));
if (status != 0)
- printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
+ pr_emerg("ibm,os-term call failed %d\n", status);
}
/**
@@ -983,8 +1050,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log
return NULL;
}
-#ifdef CONFIG_PPC_RTAS_FILTER
-
/*
* The sys_rtas syscall, as originally designed, allows root to pass
* arbitrary physical addresses to RTAS calls. A number of RTAS calls
@@ -1133,20 +1198,6 @@ static void __init rtas_syscall_filter_init(void)
rtas_filters[i].token = rtas_token(rtas_filters[i].name);
}
-#else
-
-static bool block_rtas_call(int token, int nargs,
- struct rtas_args *args)
-{
- return false;
-}
-
-static void __init rtas_syscall_filter_init(void)
-{
-}
-
-#endif /* CONFIG_PPC_RTAS_FILTER */
-
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
@@ -1277,6 +1328,15 @@ void __init rtas_initialize(void)
no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
rtas.entry = no_entry ? rtas.base : entry;
+ init_error_log_max();
+
+ /*
+ * Discover these now to avoid device tree lookups in the
+ * panic path.
+ */
+ if (of_property_read_bool(rtas.dev, "ibm,extended-os-term"))
+ ibm_os_term_token = rtas_token("ibm,os-term");
+
/* If RTAS was found, allocate the RMO buffer for it and look for
* the stop-self token if any
*/
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5270b450bbde..cc56ac6ba4b0 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -9,6 +9,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
@@ -499,6 +500,8 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
static int __init rtas_event_scan_init(void)
{
+ int err;
+
if (!machine_is(pseries) && !machine_is(chrp))
return 0;
@@ -509,8 +512,8 @@ static int __init rtas_event_scan_init(void)
return -ENODEV;
}
- rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
- if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
+ err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate);
+ if (err) {
printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
return -ENODEV;
}
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0da6e59161cd..6b90f10a6c81 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1249,7 +1249,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
- THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ THREAD_SIZE - STACK_FRAME_MIN_SIZE;
#endif
task_thread_info(idle)->cpu = cpu;
secondary_current = current_set[cpu] = idle;
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index a2443d61728e..5de8597eaab8 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -43,7 +43,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry,
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
- if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, task))
return;
newsp = stack[0];
@@ -77,7 +77,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
/*
* For user tasks, this is the SP value loaded on
* kernel entry, see "PACAKSAVE(r13)" in _switch() and
- * system_call_common()/EXCEPTION_PROLOG_COMMON().
+ * system_call_common().
*
* Likewise for non-swapper kernel threads,
* this also happens to be the top of the stack
@@ -88,13 +88,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
* an unreliable stack trace until it's been
* _switch()'ed to for the first time.
*/
- stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ stack_end -= STACK_USER_INT_FRAME_SIZE;
} else {
/*
* idle tasks have a custom stack layout,
* c.f. cpu_idle_thread_init().
*/
- stack_end -= STACK_FRAME_OVERHEAD;
+ stack_end -= STACK_FRAME_MIN_SIZE;
}
if (task == current)
@@ -136,7 +136,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
/* Mark stacktraces with exception frames as unreliable. */
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
- stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
return -EINVAL;
}
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 1ab4a4d95aba..d451a8229223 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -112,7 +112,7 @@ PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
advice);
}
-COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
+PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
int, fd, unsigned int, flags,
unsigned int, offset1, unsigned int, offset2,
unsigned int, nbytes1, unsigned int, nbytes2)
@@ -122,3 +122,14 @@ COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
+
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE6(ppc_fallocate,
+ int, fd, int, mode,
+ u32, offset1, u32, offset2, u32, len1, u32, len2)
+{
+ return ksys_fallocate(fd, mode,
+ merge_64(offset1, offset2),
+ merge_64(len1, len2));
+}
+#endif
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index e9e0df4f9a61..a0be127475b1 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -394,8 +394,11 @@
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
-308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2
-309 nospu fallocate sys_fallocate compat_sys_fallocate
+308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
+308 64 sync_file_range2 sys_sync_file_range2
+308 spu sync_file_range2 sys_sync_file_range2
+309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
+309 64 fallocate sys_fallocate
310 nospu subpage_prot sys_subpage_prot
311 32 timerfd_settime sys_timerfd_settime32
311 64 timerfd_settime sys_timerfd_settime
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a2ab397065c6..d68de3618741 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy;
unsigned long tb_ticks_per_usec = 100; /* sane default */
EXPORT_SYMBOL(tb_ticks_per_usec);
unsigned long tb_ticks_per_sec;
-EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
+EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
@@ -151,21 +151,6 @@ bool tb_invalid;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
- * Factor for converting from cputime_t (timebase ticks) to
- * microseconds. This is stored as 0.64 fixed-point binary fraction.
- */
-u64 __cputime_usec_factor;
-EXPORT_SYMBOL(__cputime_usec_factor);
-
-static void calc_cputime_factors(void)
-{
- struct div_result res;
-
- div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
- __cputime_usec_factor = res.result_low;
-}
-
-/*
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
@@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk)
acct->hardirq_time = 0;
acct->softirq_time = 0;
}
-
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#define calc_cputime_factors()
-#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
void __delay(unsigned long loops)
{
@@ -914,7 +896,6 @@ void __init time_init(void)
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
- calc_cputime_factors();
/*
* Compute scale factor for sched_clock.
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 5a0f023a26e9..9feab5e0485b 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
+ /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
@@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim)
* Make r7 look like an exception frame so that we can use the neat
* GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
+ subi r7, r7, STACK_INT_FRAME_REGS
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
@@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint)
stdu r1, -TM_FRAME_SIZE(r1)
/*
- * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
@@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint)
* Make r7 look like an exception frame so that we can use the neat
* GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
+ subi r7, r7, STACK_INT_FRAME_REGS
/* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index d031093bc436..ffb1db386849 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -110,7 +110,7 @@
.endif
/* Load &pt_regs in r6 for call below */
- addi r6, r1, STACK_FRAME_OVERHEAD
+ addi r6, r1, STACK_INT_FRAME_REGS
.endm
.macro ftrace_regs_exit allregs
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7786e3ac7611..8c3862b4c259 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -142,7 +142,7 @@ SECTIONS
#endif
.data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
- *(.data.rel.ro*)
+ *(.data.rel.ro .data.rel.ro.*)
}
.branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 349a781cea0b..2500c37c628c 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -26,6 +26,7 @@
#include <asm/firmware.h>
#include <asm/kexec_ranges.h>
#include <asm/crashdump-ppc64.h>
+#include <asm/prom.h>
struct umem_info {
u64 *buf; /* data buffer for usable-memory property */
@@ -929,6 +930,45 @@ out:
}
/**
+ * get_cpu_node_size - Compute the size of a CPU node in the FDT.
+ * This should be done only once and the value is stored in
+ * a static variable.
+ * Returns the max size of a CPU node in the FDT.
+ */
+static unsigned int cpu_node_size(void)
+{
+ static unsigned int size;
+ struct device_node *dn;
+ struct property *pp;
+
+ /*
+ * Don't compute it twice, we are assuming that the per CPU node size
+ * doesn't change during the system's life.
+ */
+ if (size)
+ return size;
+
+ dn = of_find_node_by_type(NULL, "cpu");
+ if (WARN_ON_ONCE(!dn)) {
+ // Unlikely to happen
+ return 0;
+ }
+
+ /*
+ * We compute the sub node size for a CPU node, assuming it
+ * will be the same for all.
+ */
+ size += strlen(dn->name) + 5;
+ for_each_property_of_node(dn, pp) {
+ size += strlen(pp->name);
+ size += pp->length;
+ }
+
+ of_node_put(dn);
+ return size;
+}
+
+/**
* kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
* setup FDT for kexec/kdump kernel.
* @image: kexec image being loaded.
@@ -937,6 +977,8 @@ out:
*/
unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
{
+ unsigned int cpu_nodes, extra_size;
+ struct device_node *dn;
u64 usm_entries;
if (image->type != KEXEC_TYPE_CRASH)
@@ -949,7 +991,22 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
*/
usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
(2 * (resource_size(&crashk_res) / drmem_lmb_size())));
- return (unsigned int)(usm_entries * sizeof(u64));
+
+ extra_size = (unsigned int)(usm_entries * sizeof(u64));
+
+ /*
+ * Get the number of CPU nodes in the current DT. This allows to
+ * reserve places for CPU nodes added since the boot time.
+ */
+ cpu_nodes = 0;
+ for_each_node_by_type(dn, "cpu") {
+ cpu_nodes++;
+ }
+
+ if (cpu_nodes > boot_cpu_node_count)
+ extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+
+ return extra_size;
}
/**
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 61cdd782d3c5..a9f57dad6d91 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
+ depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
help
Support running guest kernels in virtual machines on processors
@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select MMU_NOTIFIER
@@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e9744b41a226..351ff0f89b00 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1202,7 +1202,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
if (rc < 0)
return rc;
- resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+ resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__,
resize->hpt.virt);
return 0;
@@ -1443,7 +1443,7 @@ static void resize_hpt_prepare_work(struct work_struct *work)
*/
mutex_unlock(&kvm->arch.mmu_setup_lock);
- resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+ resize_hpt_debug(resize, "%s(): order = %d\n", __func__,
resize->order);
err = resize_hpt_allocate(resize);
@@ -1887,8 +1887,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
tmp);
if (ret != H_SUCCESS) {
- pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
- "r=%lx\n", ret, i, v, r);
+ pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r);
goto out;
}
if (!mmu_ready && is_vrma_hpte(v)) {
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 40864373ef87..95e738ef9062 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -294,14 +294,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
struct mm_struct *mm = kvm->mm;
- unsigned long npages, size = args->size;
+ unsigned long npages;
int ret;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL;
- npages = kvmppc_tce_pages(size);
+ npages = kvmppc_tce_pages(args->size);
ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
if (ret)
return ret;
@@ -314,7 +314,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->liobn = args->liobn;
stt->page_shift = args->page_shift;
stt->offset = args->offset;
- stt->size = size;
+ stt->size = args->size;
stt->kvm = kvm;
mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 96b65b530156..acf80915f406 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2736,7 +2736,7 @@ kvmppc_bad_host_intr:
std r6, SOFTE(r1)
LOAD_PACA_TOC()
LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER)
- std r3, STACK_FRAME_OVERHEAD-16(r1)
+ std r3, STACK_INT_FRAME_MARKER(r1)
/*
* XXX On POWER7 and POWER8, we just spin here since we don't
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index e2f11f9c3f2a..1d67baa5557a 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -1190,8 +1190,7 @@ int kvmppc_uvmem_init(void)
pfn_first = res->start >> PAGE_SHIFT;
pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
- kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
- sizeof(unsigned long), GFP_KERNEL);
+ kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL);
if (!kvmppc_uvmem_bitmap) {
ret = -ENOMEM;
goto out_unmap;
@@ -1215,5 +1214,5 @@ void kvmppc_uvmem_free(void)
memunmap_pages(&kvmppc_uvmem_pgmap);
release_mem_region(kvmppc_uvmem_pgmap.range.start,
range_len(&kvmppc_uvmem_pgmap.range));
- kfree(kvmppc_uvmem_bitmap);
+ bitmap_free(kvmppc_uvmem_bitmap);
}
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 4ca23644f752..f4115819e738 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -539,7 +539,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
if (irq == XICS_IPI || irq == 0) {
/*
* This barrier orders the setting of xc->cppr vs.
- * subsquent test of xc->mfrr done inside
+ * subsequent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb();
@@ -563,7 +563,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
/*
* This barrier orders both setting of in_eoi above vs,
* subsequent test of guest_priority, and the setting
- * of xc->cppr vs. subsquent test of xc->mfrr done inside
+ * of xc->cppr vs. subsequent test of xc->mfrr done inside
* scan_interrupts and push_pending_to_hw
*/
smp_mb();
@@ -1785,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
* stale_p (because it has no easy way to address it). Hence we have
* to adjust stale_p before shutting down the interrupt.
*/
-void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
- struct kvmppc_xive_vcpu *xc, int irq)
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -1827,8 +1826,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
if (xc->esc_virq[i]) {
if (kvmppc_xive_has_single_escalation(xc->xive))
- xive_cleanup_single_escalation(vcpu, xc,
- xc->esc_virq[i]);
+ xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
@@ -2392,7 +2390,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
/*
* Now, we select a target if we have one. If we don't we
* leave the interrupt untargetted. It means that an interrupt
- * can become "untargetted" accross migration if it was masked
+ * can become "untargetted" across migration if it was masked
* by set_xive() but there is little we can do about it.
*/
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 1e48f72e8aa5..62bf39f53783 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -299,8 +299,7 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
-void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
- struct kvmppc_xive_vcpu *xc, int irq);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq);
int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5271c33fe79e..4f566bea5e10 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -93,8 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
/* Free the escalation irq */
if (xc->esc_virq[i]) {
if (kvmppc_xive_has_single_escalation(xc->xive))
- xive_cleanup_single_escalation(vcpu, xc,
- xc->esc_virq[i]);
+ xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7b4920e9fd26..0dce93ccaadf 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1015,6 +1015,9 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
u32 last_inst = KVM_INST_FETCH_FAILED;
enum emulation_result emulated = EMULATE_DONE;
+ /* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */
+ kvmppc_fix_ee_after_exit();
+
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 8262c14fc9e6..b5fe6fb53c66 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -424,15 +424,6 @@ _GLOBAL(kvmppc_resume_host)
mtspr SPRN_EPCR, r3
isync
-#ifdef CONFIG_64BIT
- /*
- * We enter with interrupts disabled in hardware, but
- * we need to call RECONCILE_IRQ_STATE to ensure
- * that the software state is kept in sync.
- */
- RECONCILE_IRQ_STATE(r3,r5)
-#endif
-
/* Switch to kernel stack and jump to handler. */
mr r3, r4
mr r5, r14 /* intno */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8560c912186d..4de71cbf6e8e 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o copy_mc_64.o
-ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP) += qspinlock.o
+else
obj64-$(CONFIG_SMP) += locks.o
endif
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ad0cf3108dd0..73ce4b90bb1b 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -4,12 +4,17 @@
*/
#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/cpuhotplug.h>
#include <linux/uaccess.h>
#include <linux/jump_label.h>
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
#include <asm/code-patching.h>
@@ -41,12 +46,59 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
return __patch_instruction(addr, instr, addr);
}
-#ifdef CONFIG_STRICT_KERNEL_RWX
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+struct patch_context {
+ union {
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ };
+ unsigned long addr;
+ pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
static int map_patch_area(void *addr, unsigned long text_poke_addr);
static void unmap_patch_area(unsigned long addr);
+static bool mm_patch_enabled(void)
+{
+ return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *orig_mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+ WARN_ON(!mm_is_thread_local(temp_mm));
+
+ suspend_breakpoints();
+ return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+ struct mm_struct *orig_mm)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(temp_mm, orig_mm, current);
+ restore_breakpoints();
+}
+
static int text_area_cpu_up(unsigned int cpu)
{
struct vm_struct *area;
@@ -68,29 +120,110 @@ static int text_area_cpu_up(unsigned int cpu)
unmap_patch_area(addr);
- this_cpu_write(text_poke_area, area);
+ this_cpu_write(cpu_patching_context.area, area);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
return 0;
}
static int text_area_cpu_down(unsigned int cpu)
{
- free_vm_area(this_cpu_read(text_poke_area));
+ free_vm_area(this_cpu_read(cpu_patching_context.area));
+ this_cpu_write(cpu_patching_context.area, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
+ return 0;
+}
+
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm);
+ free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+ mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+ struct mm_struct *mm;
+ unsigned long addr;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ mm = mm_alloc();
+ if (WARN_ON(!mm))
+ goto fail_no_mm;
+
+ /*
+ * Choose a random page-aligned address from the interval
+ * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+ * The lower address bound is PAGE_SIZE to avoid the zero-page.
+ */
+ addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+ /*
+ * PTE allocation uses GFP_KERNEL which means we need to
+ * pre-allocate the PTE here because we cannot do the
+ * allocation during patching when IRQs are disabled.
+ *
+ * Using get_locked_pte() to avoid open coding, the lock
+ * is unnecessary.
+ */
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto fail_no_pte;
+ pte_unmap_unlock(pte, ptl);
+
+ this_cpu_write(cpu_patching_context.mm, mm);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, pte);
+
+ return 0;
+
+fail_no_pte:
+ put_patching_mm(mm, addr);
+fail_no_mm:
+ return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
+{
+ put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+ this_cpu_read(cpu_patching_context.addr));
+
+ this_cpu_write(cpu_patching_context.mm, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
+
return 0;
}
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
-/*
- * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
- * we judge it as being preferable to a kernel that will crash later when
- * someone tries to use patch_instruction().
- */
void __init poking_init(void)
{
- BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
- "powerpc/text_poke:online", text_area_cpu_up,
- text_area_cpu_down));
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ return;
+
+ if (mm_patch_enabled())
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke_mm:online",
+ text_area_cpu_up_mm,
+ text_area_cpu_down_mm);
+ else
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down);
+
+ /* cpuhp_setup_state returns >= 0 on success */
+ if (WARN_ON(ret < 0))
+ return;
+
static_branch_enable(&poking_init_done);
}
@@ -147,6 +280,50 @@ static void unmap_patch_area(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+ struct mm_struct *patching_mm;
+ struct mm_struct *orig_mm;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync": : :"memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ /* hwsync performed by __patch_instruction (sync) if successful */
+ if (err)
+ mb(); /* sync */
+
+ /* context synchronisation performed by __patch_instruction (isync or exception) */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ return err;
+}
+
static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
@@ -155,10 +332,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
pte_t *pte;
unsigned long pfn = get_patch_pfn(addr);
- text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
- pte = virt_to_kpte(text_poke_addr);
+ pte = __this_cpu_read(cpu_patching_context.pte);
__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
/* See ptesync comment in radix__set_pte_at() */
if (radix_enabled())
@@ -172,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
return err;
}
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
+int patch_instruction(u32 *addr, ppc_inst_t instr)
{
int err;
unsigned long flags;
@@ -182,34 +359,19 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!static_branch_likely(&poking_init_done))
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+ !static_branch_likely(&poking_init_done))
return raw_patch_instruction(addr, instr);
local_irq_save(flags);
- err = __do_patch_instruction(addr, instr);
+ if (mm_patch_enabled())
+ err = __do_patch_instruction_mm(addr, instr);
+ else
+ err = __do_patch_instruction(addr, instr);
local_irq_restore(flags);
return err;
}
-#else /* !CONFIG_STRICT_KERNEL_RWX */
-
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
-{
- return raw_patch_instruction(addr, instr);
-}
-
-#endif /* CONFIG_STRICT_KERNEL_RWX */
-
-__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
-
-int patch_instruction(u32 *addr, ppc_inst_t instr)
-{
- /* Make sure we aren't patching a freed init section */
- if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
- return 0;
-
- return do_patch_instruction(addr, instr);
-}
NOKPROBE_SYMBOL(patch_instruction);
int patch_branch(u32 *addr, unsigned long target, int flags)
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 31f40f544de5..80def1c2afcb 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -117,10 +117,64 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+ return system_state < SYSTEM_FREEING_INITMEM ||
+ !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ int j;
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ for (j = 0; j < num; j++)
+ patch_instruction(dest + j, ppc_inst(instrs[j]));
+ }
+ return i;
+}
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+ bool do_fallback, void *fallback)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (do_fallback) {
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ }
+ }
+ return i;
+}
+
static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -144,23 +198,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- // See comment in do_entry_flush_fixups() RE order of patching
- if (types & STF_BARRIER_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
+ i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+ &stf_barrier_fallback);
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
@@ -172,7 +211,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
{
- unsigned int instrs[6], *dest;
+ unsigned int instrs[6];
long *start, *end;
int i;
@@ -206,18 +245,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest + 3, ppc_inst(instrs[3]));
- patch_instruction(dest + 4, ppc_inst(instrs[4]));
- patch_instruction(dest + 5, ppc_inst(instrs[5]));
- }
printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
(types == STF_BARRIER_NONE) ? "no" :
(types == STF_BARRIER_FALLBACK) ? "fallback" :
@@ -274,7 +303,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
void do_uaccess_flush_fixups(enum l1d_flush_type types)
{
- unsigned int instrs[4], *dest;
+ unsigned int instrs[4];
long *start, *end;
int i;
@@ -300,17 +329,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
-
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest + 3, ppc_inst(instrs[3]));
- }
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -325,7 +344,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
static int __do_entry_flush_fixups(void *data)
{
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -375,42 +394,13 @@ static int __do_entry_flush_fixups(void *data)
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
+ i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &entry_flush_fallback);
start = PTRRELOC(&__start___scv_entry_flush_fixup);
end = PTRRELOC(&__stop___scv_entry_flush_fixup);
- for (; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- if (types == L1D_FLUSH_FALLBACK) {
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_branch(dest + 1,
- (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
- } else {
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- patch_instruction(dest, ppc_inst(instrs[0]));
- }
- }
-
+ i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &scv_entry_flush_fallback);
printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -438,7 +428,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
static int __do_rfi_flush_fixups(void *data)
{
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
- unsigned int instrs[3], *dest;
+ unsigned int instrs[3];
long *start, *end;
int i;
@@ -462,15 +452,7 @@ static int __do_rfi_flush_fixups(void *data)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
-
- patch_instruction(dest, ppc_inst(instrs[0]));
- patch_instruction(dest + 1, ppc_inst(instrs[1]));
- patch_instruction(dest + 2, ppc_inst(instrs[2]));
- }
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
@@ -512,7 +494,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr, *dest;
+ unsigned int instr;
long *start, *end;
int i;
@@ -526,12 +508,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, ppc_inst(instr));
- }
+ i = do_patch_fixups(start, end, &instr, 1);
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
@@ -553,7 +530,7 @@ void do_barrier_nospec_fixups(bool enable)
#ifdef CONFIG_PPC_E500
void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
{
- unsigned int instr[2], *dest;
+ unsigned int instr[2];
long *start, *end;
int i;
@@ -569,13 +546,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
instr[1] = PPC_RAW_SYNC();
}
- for (i = 0; start < end; start++, i++) {
- dest = (void *)start + *start;
-
- pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction(dest, ppc_inst(instr[0]));
- patch_instruction(dest + 1, ppc_inst(instr[1]));
- }
+ i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
}
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 000000000000..1cf5d3e75250
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,996 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+
+#define MAX_NODES 4
+
+struct qnode {
+ struct qnode *next;
+ struct qspinlock *lock;
+ int cpu;
+ int yield_cpu;
+ u8 locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+ int count;
+ struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+ /* pv_sleepy_lock is true when this is called */
+ if (pv_sleepy_lock_interval_ns) {
+ u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+ if (seen) {
+ u64 delta = sched_clock() - seen;
+ if (delta < pv_sleepy_lock_interval_ns)
+ return true;
+ this_cpu_write(sleepy_lock_seen_clock, 0);
+ }
+ }
+
+ return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return steal_spins * pv_sleepy_lock_factor;
+ else
+ return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return remote_steal_spins * pv_sleepy_lock_factor;
+ else
+ return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return head_spins * pv_sleepy_lock_factor;
+ else
+ return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+ return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+ return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+ return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 newval = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ asm volatile(
+"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
+ /* This test is necessary if there could be stealers */
+" andi. %1,%0,%5 \n"
+" bne 3f \n"
+ /* Test whether the lock tail == mytail */
+" and %1,%0,%6 \n"
+" cmpw 0,%1,%3 \n"
+ /* Merge the new locked value */
+" or %1,%1,%4 \n"
+" bne 2f \n"
+ /* If the lock tail matched, then clear it, otherwise leave it. */
+" andc %1,%1,%6 \n"
+"2: stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"3: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r"(tail), "r" (newval),
+ "i" (_Q_LOCKED_VAL),
+ "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+ u32 prev, tmp;
+
+ asm volatile(
+"\t" PPC_RELEASE_BARRIER " \n"
+"1: lwarx %0,0,%2 # publish_tail_cpu \n"
+" andc %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+ : "=&r" (prev), "=&r"(tmp)
+ : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # set_mustq \n"
+" or %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # clear_mustq \n"
+" andc %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+ u32 prev;
+ u32 new = old | _Q_SLEEPY_VAL;
+
+ BUG_ON(!(old & _Q_LOCKED_VAL));
+ BUG_ON(old & _Q_SLEEPY_VAL);
+
+ asm volatile(
+"1: lwarx %0,0,%1 # try_set_sleepy \n"
+" cmpw 0,%0,%2 \n"
+" bne- 2f \n"
+" stwcx. %3,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r"(old), "r" (new)
+ : "cr0", "memory");
+
+ return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+ if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (val & _Q_LOCKED_VAL) {
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+ }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
+{
+ int cpu = decode_tail_cpu(val);
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+ int idx;
+
+ /*
+ * After publishing the new tail and finding a previous tail in the
+ * previous val (which is the control dependency), this barrier
+ * orders the release barrier in publish_tail_cpu performed by the
+ * last CPU, with subsequently looking at its qnode structures
+ * after the barrier.
+ */
+ smp_acquire__after_ctrl_dep();
+
+ for (idx = 0; idx < MAX_NODES; idx++) {
+ struct qnode *qnode = &qnodesp->nodes[idx];
+ if (qnode->lock == lock)
+ return qnode;
+ }
+
+ BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+ int owner;
+ u32 yield_count;
+ bool preempted = false;
+
+ BUG_ON(!(val & _Q_LOCKED_VAL));
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_owner)
+ goto relax;
+
+ owner = get_owner_cpu(val);
+ yield_count = yield_count_of(owner);
+
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ seen_sleepy_owner(lock, val);
+ preempted = true;
+
+ /*
+ * Read the lock word after sampling the yield count. On the other side
+ * there may a wmb because the yield count update is done by the
+ * hypervisor preemption and the value update by the OS, however this
+ * ordering might reduce the chance of out of order accesses and
+ * improve the heuristic.
+ */
+ smp_rmb();
+
+ if (READ_ONCE(lock->val) == val) {
+ if (mustq)
+ clear_mustq(lock);
+ yield_to_preempted(owner, yield_count);
+ if (mustq)
+ set_mustq(lock);
+ spin_begin();
+
+ /* Don't relax if we yielded. Maybe we should? */
+ return preempted;
+ }
+ spin_begin();
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ bool mustq = false;
+
+ if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+ mustq = true;
+
+ return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_propagate_owner)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (*set_yield_cpu == owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (vcpu_is_preempted(owner)) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ } else if (*set_yield_cpu != -1) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ }
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+{
+ int prev_cpu = decode_tail_cpu(val);
+ u32 yield_count;
+ int yield_cpu;
+ bool preempted = false;
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_propagate_owner)
+ goto yield_prev;
+
+ yield_cpu = READ_ONCE(node->yield_cpu);
+ if (yield_cpu == -1) {
+ /* Propagate back the -1 CPU */
+ if (node->next && node->next->yield_cpu != -1)
+ node->next->yield_cpu = yield_cpu;
+ goto yield_prev;
+ }
+
+ yield_count = yield_count_of(yield_cpu);
+ if ((yield_count & 1) == 0)
+ goto yield_prev; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb();
+
+ if (yield_cpu == node->yield_cpu) {
+ if (node->next && node->next->yield_cpu != yield_cpu)
+ node->next->yield_cpu = yield_cpu;
+ yield_to_preempted(yield_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+yield_prev:
+ if (!pv_yield_prev)
+ goto relax;
+
+ yield_count = yield_count_of(prev_cpu);
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb(); /* See __yield_to_locked_owner comment */
+
+ if (!node->locked) {
+ yield_to_preempted(prev_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+ if (iters >= get_steal_spins(paravirt, sleepy))
+ return true;
+
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+ int cpu = get_owner_cpu(val);
+ if (numa_node_id() != cpu_to_node(cpu))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+ bool seen_preempted = false;
+ bool sleepy = false;
+ int iters = 0;
+ u32 val;
+
+ if (!steal_spins) {
+ /* XXX: should spin_on_preempted_owner do anything here? */
+ return false;
+ }
+
+ /* Attempt to steal the lock */
+ spin_begin();
+ do {
+ bool preempted = false;
+
+ val = READ_ONCE(lock->val);
+ if (val & _Q_MUST_Q_VAL)
+ break;
+ spec_barrier();
+
+ if (unlikely(!(val & _Q_LOCKED_VAL))) {
+ spin_end();
+ if (__queued_spin_trylock_steal(lock))
+ return true;
+ spin_begin();
+ } else {
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ }
+
+ if (paravirt && pv_sleepy_lock) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ if (preempted) {
+ seen_preempted = true;
+ sleepy = true;
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ /*
+ * pv_spin_on_preempted_owner don't increase iters
+ * while the owner is preempted -- we won't interfere
+ * with it by definition. This could introduce some
+ * latency issue if we continually observe preempted
+ * owners, but hopefully that's a rare corner case of
+ * a badly oversubscribed system.
+ */
+ } else {
+ iters++;
+ }
+ } while (!steal_break(val, iters, paravirt, sleepy));
+
+ spin_end();
+
+ return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+ struct qnodes *qnodesp;
+ struct qnode *next, *node;
+ u32 val, old, tail;
+ bool seen_preempted = false;
+ bool sleepy = false;
+ bool mustq = false;
+ int idx;
+ int set_yield_cpu = -1;
+ int iters = 0;
+
+ BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+ qnodesp = this_cpu_ptr(&qnodes);
+ if (unlikely(qnodesp->count >= MAX_NODES)) {
+ spec_barrier();
+ while (!queued_spin_trylock(lock))
+ cpu_relax();
+ return;
+ }
+
+ idx = qnodesp->count++;
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+ node = &qnodesp->nodes[idx];
+ node->next = NULL;
+ node->lock = lock;
+ node->cpu = smp_processor_id();
+ node->yield_cpu = -1;
+ node->locked = 0;
+
+ tail = encode_tail_cpu(node->cpu);
+
+ old = publish_tail_cpu(lock, tail);
+
+ /*
+ * If there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_CPU_MASK) {
+ struct qnode *prev = get_tail_qnode(lock, old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ /* Wait for mcs node lock to be released */
+ spin_begin();
+ while (!node->locked) {
+ spec_barrier();
+
+ if (yield_to_prev(lock, node, old, paravirt))
+ seen_preempted = true;
+ }
+ spec_barrier();
+ spin_end();
+
+ /* Clear out stale propagated yield_cpu */
+ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+ node->yield_cpu = -1;
+
+ smp_rmb(); /* acquire barrier for the mcs lock */
+
+ /*
+ * Generic qspinlocks have this prefetch here, but it seems
+ * like it could cause additional line transitions because
+ * the waiter will keep loading from it.
+ */
+ if (_Q_SPIN_PREFETCH_NEXT) {
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
+ }
+ }
+
+ /* We're at the head of the waitqueue, wait for the lock. */
+again:
+ spin_begin();
+ for (;;) {
+ bool preempted;
+
+ val = READ_ONCE(lock->val);
+ if (!(val & _Q_LOCKED_VAL))
+ break;
+ spec_barrier();
+
+ if (paravirt && pv_sleepy_lock && maybe_stealers) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ preempted = yield_head_to_locked_owner(lock, val, paravirt);
+ if (!maybe_stealers)
+ continue;
+
+ if (preempted)
+ seen_preempted = true;
+
+ if (paravirt && preempted) {
+ sleepy = true;
+
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ } else {
+ iters++;
+ }
+
+ if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+ mustq = true;
+ set_mustq(lock);
+ val |= _Q_MUST_Q_VAL;
+ }
+ }
+ spec_barrier();
+ spin_end();
+
+ /* If we're the last queued, must clean up the tail. */
+ old = trylock_clean_tail(lock, tail);
+ if (unlikely(old & _Q_LOCKED_VAL)) {
+ BUG_ON(!maybe_stealers);
+ goto again; /* Can only be true if maybe_stealers. */
+ }
+
+ if ((old & _Q_TAIL_CPU_MASK) == tail)
+ goto release; /* We were the tail, no next. */
+
+ /* There is a next, must wait for node->next != NULL (MCS protocol) */
+ next = READ_ONCE(node->next);
+ if (!next) {
+ spin_begin();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ spin_end();
+ }
+ spec_barrier();
+
+ /*
+ * Unlock the next mcs waiter node. Release barrier is not required
+ * here because the acquirer is only accessing the lock word, and
+ * the acquire barrier we took the lock with orders that update vs
+ * this store to locked. The corresponding barrier is the smp_rmb()
+ * acquire barrier for mcs lock, above.
+ */
+ if (paravirt && pv_prod_head) {
+ int next_cpu = next->cpu;
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ if (vcpu_is_preempted(next_cpu))
+ prod_cpu(next_cpu);
+ } else {
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ }
+
+release:
+ qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+ /*
+ * This looks funny, but it induces the compiler to inline both
+ * sides of the branch rather than share code as when the condition
+ * is passed as the paravirt argument to the functions.
+ */
+ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+ if (try_to_steal_lock(lock, true)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, true);
+ } else {
+ if (try_to_steal_lock(lock, false)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, false);
+ }
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
+ static DEFINE_MUTEX(lock);
+
+ /*
+ * The lock slow path has a !maybe_stealers case that can assume
+ * the head of queue will not see concurrent waiters. That waiter
+ * is unsafe in the presence of stealers, so must keep them away
+ * from one another.
+ */
+
+ mutex_lock(&lock);
+ if (val && !steal_spins) {
+ maybe_stealers = true;
+ /* wait for queue head waiter to go away */
+ synchronize_rcu();
+ steal_spins = val;
+ } else if (!val && steal_spins) {
+ steal_spins = val;
+ /* wait for all possible stealers to go away */
+ synchronize_rcu();
+ maybe_stealers = false;
+ } else {
+ steal_spins = val;
+ }
+ mutex_unlock(&lock);
+#endif
+
+ return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+ *val = steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+ remote_steal_spins = val;
+
+ return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+ *val = remote_steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+ head_spins = val;
+
+ return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+ *val = head_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+ pv_yield_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+ pv_yield_allow_steal = !!val;
+
+ return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+ *val = pv_yield_allow_steal;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+ pv_spin_on_preempted_owner = !!val;
+
+ return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+ *val = pv_spin_on_preempted_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+ pv_sleepy_lock = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+ pv_sleepy_lock_sticky = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_sticky;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+ pv_sleepy_lock_interval_ns = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_interval_ns;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+ pv_sleepy_lock_factor = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_factor;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+ pv_yield_prev = !!val;
+
+ return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+ *val = pv_yield_prev;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+ pv_yield_propagate_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_propagate_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+ pv_prod_head = !!val;
+
+ return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+ *val = pv_prod_head;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+ debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+ debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+ debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+ if (is_shared_processor()) {
+ debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+ debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+ debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+ debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+ debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+ debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+ debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+ debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+ debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+ }
+
+ return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 398b5694aeb7..38158b77a801 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
op->type = MKOP(STCX, 0, 4);
break;
-#ifdef __powerpc64__
- case 84: /* ldarx */
- op->type = MKOP(LARX, 0, 8);
- break;
-
- case 214: /* stdcx. */
- op->type = MKOP(STCX, 0, 8);
- break;
-
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 52: /* lbarx */
op->type = MKOP(LARX, 0, 1);
break;
@@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
case 726: /* sthcx. */
op->type = MKOP(STCX, 0, 2);
break;
+#endif
+#ifdef __powerpc64__
+ case 84: /* ldarx */
+ op->type = MKOP(LARX, 0, 8);
+ break;
+
+ case 214: /* stdcx. */
+ op->type = MKOP(STCX, 0, 8);
+ break;
case 276: /* lqarx */
if (!((rd & 1) || rd == ra || rd == rb))
@@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
err = 0;
val = 0;
switch (size) {
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
case 1:
__get_user_asmx(val, ea, err, "lbarx");
break;
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 5473f9d03df3..e2b646a4f7fa 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -16,7 +16,7 @@ _GLOBAL(exec_instr)
/*
* Stack frame layout (INT_FRAME_SIZE bytes)
- * In-memory pt_regs (SP + STACK_FRAME_OVERHEAD)
+ * In-memory pt_regs (SP + STACK_INT_FRAME_REGS)
* Scratch space (SP + 8)
* Back chain (SP + 0)
*/
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index f76a50291fd7..d491da8d1838 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
- preempt_enable();
+ preempt_enable_no_resched();
+ /*
+ * Must never explicitly call schedule (including preempt_enable())
+ * while in a kuap-unlocked user copy, because the AMR register will
+ * not be saved and restored across context switch. However preempt
+ * kernels need to be preempted as soon as possible if need_resched is
+ * set and we are preemptible. The hack here is to schedule a
+ * decrementer to fire here and reschedule for us if necessary.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+ set_dec(1);
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 7de1a8a0c62a..02acbfd05b46 100644
--- a/arch/powerpc/mm/book3s64/hash_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -16,6 +16,8 @@
#include <asm/machdep.h>
#include <asm/mmu.h>
+#include "internal.h"
+
int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, unsigned long flags,
int ssize, int subpg_prot)
@@ -118,6 +120,9 @@ repeat:
}
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
}
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 998c6817ed47..954af420f358 100644
--- a/arch/powerpc/mm/book3s64/hash_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -16,6 +16,8 @@
#include <asm/machdep.h>
#include <asm/mmu.h>
+#include "internal.h"
+
/*
* Return true, if the entry has a slot value which
* the software considers as invalid.
@@ -216,6 +218,9 @@ repeat:
new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
new_pte |= H_PAGE_HASHPTE;
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
return 0;
}
@@ -327,7 +332,12 @@ repeat:
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
}
+
*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
return 0;
}
diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 623a7b7ab38b..9342e79870df 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map hpte_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
+
+static void acquire_hpte_lock(void)
+{
+ lock_map_acquire(&hpte_lock_map);
+}
+
+static void release_hpte_lock(void)
+{
+ lock_map_release(&hpte_lock_map);
+}
+#else
+static void acquire_hpte_lock(void)
+{
+}
+
+static void release_hpte_lock(void)
+{
+}
+#endif
+
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
+ unsigned long flags;
int i;
+ local_irq_save(flags);
+
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
- if (i == HPTES_PER_GROUP)
+ if (i == HPTES_PER_GROUP) {
+ local_irq_restore(flags);
return -1;
+ }
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
+ release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
+ local_irq_restore(flags);
+
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
return i;
@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
+ unsigned long irqflags;
+
+ local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
+ local_irq_restore(irqflags);
+
return ret;
}
@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
}
/*
@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
+
return 0;
}
@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
- if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
- else
+ } else
native_unlock_hpte(hptep);
}
/*
@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
- /*
- * Invalidate the hpte. NOTE: this also unlocks it
- */
-
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- else
+ else {
+ release_hpte_lock();
hptep->v = 0;
+ }
} pte_iterate_hashed_end();
}
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index 747492edb75a..51f48984abca 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
- unsigned int step, nr_cpus, master_cpu;
+ unsigned int step, nr_cpus;
+ atomic_t master_cpu;
atomic_t cpu_counter;
};
@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
- if (parms->master_cpu != smp_processor_id())
+ // First CPU goes through, all others wait.
+ if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
- chmem_parms.master_cpu = smp_processor_id();
+ atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index df008edf7be0..80a148c57de8 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}
-static bool disable_1tb_segments = false;
+static bool disable_1tb_segments __ro_after_init;
static int __init parse_disable_1tb_segments(char *p)
{
@@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p)
}
early_param("disable_1tb_segments", parse_disable_1tb_segments);
+bool stress_hpt_enabled __initdata;
+
+static int __init parse_stress_hpt(char *p)
+{
+ stress_hpt_enabled = true;
+ return 0;
+}
+early_param("stress_hpt", parse_stress_hpt);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
+
+/*
+ * per-CPU array allocated if we enable stress_hpt.
+ */
+#define STRESS_MAX_GROUPS 16
+struct stress_hpt_struct {
+ unsigned long last_group[STRESS_MAX_GROUPS];
+};
+
+static inline int stress_nr_groups(void)
+{
+ /*
+ * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
+ * to allow practical forward progress. Bare metal returns 1, which
+ * seems to help uncover more bugs.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return STRESS_MAX_GROUPS;
+ else
+ return 1;
+}
+
+static struct stress_hpt_struct *stress_hpt_struct;
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
pr_info("Partition table %p\n", partition_tb);
}
+void hpt_clear_stress(void);
+static struct timer_list stress_hpt_timer;
+void stress_hpt_timer_fn(struct timer_list *timer)
+{
+ int next_cpu;
+
+ hpt_clear_stress();
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ tlbiel_all();
+
+ next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(cpu_online_mask);
+ stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+ add_timer_on(&stress_hpt_timer, next_cpu);
+}
+
static void __init htab_initialize(void)
{
unsigned long table;
@@ -995,6 +1046,20 @@ static void __init htab_initialize(void)
if (stress_slb_enabled)
static_branch_enable(&stress_slb_key);
+ if (stress_hpt_enabled) {
+ unsigned long tmp;
+ static_branch_enable(&stress_hpt_key);
+ // Too early to use nr_cpu_ids, so use NR_CPUS
+ tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
+ 0, 0, MEMBLOCK_ALLOC_ANYWHERE);
+ memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
+ stress_hpt_struct = __va(tmp);
+
+ timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
+ stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+ add_timer(&stress_hpt_timer);
+ }
+
/*
* Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages.
@@ -1980,8 +2045,71 @@ repeat:
return slot;
}
+void hpt_clear_stress(void)
+{
+ int cpu = raw_smp_processor_id();
+ int g;
+
+ for (g = 0; g < stress_nr_groups(); g++) {
+ unsigned long last_group;
+ last_group = stress_hpt_struct[cpu].last_group[g];
+
+ if (last_group != -1UL) {
+ int i;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ if (mmu_hash_ops.hpte_remove(last_group) == -1)
+ break;
+ }
+ stress_hpt_struct[cpu].last_group[g] = -1;
+ }
+ }
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
+{
+ unsigned long last_group;
+ int cpu = raw_smp_processor_id();
+
+ last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
+ if (hpte_group == last_group)
+ return;
+
+ if (last_group != -1UL) {
+ int i;
+ /*
+ * Concurrent CPUs might be inserting into this group, so
+ * give up after a number of iterations, to prevent a live
+ * lock.
+ */
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ if (mmu_hash_ops.hpte_remove(last_group) == -1)
+ break;
+ }
+ stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
+ }
+
+ if (ea >= PAGE_OFFSET) {
+ /*
+ * We would really like to prefetch to get the TLB loaded, then
+ * remove the PTE before returning from fault interrupt, to
+ * increase the hash fault rate.
+ *
+ * Unfortunately QEMU TCG does not model the TLB in a way that
+ * makes this possible, and systemsim (mambo) emulator does not
+ * bring in TLBs with prefetches (although loads/stores do
+ * work for non-CI PTEs).
+ *
+ * So remember this PTE and clear it on the next hash fault.
+ */
+ memmove(&stress_hpt_struct[cpu].last_group[1],
+ &stress_hpt_struct[cpu].last_group[0],
+ (stress_nr_groups() - 1) * sizeof(unsigned long));
+ stress_hpt_struct[cpu].last_group[0] = hpte_group;
+ }
+}
+
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
-static DEFINE_SPINLOCK(linear_map_hash_lock);
+static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
@@ -2005,10 +2133,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
- spin_lock(&linear_map_hash_lock);
+ raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
@@ -2018,14 +2146,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
- spin_lock(&linear_map_hash_lock);
+ raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) {
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
return;
}
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
- spin_unlock(&linear_map_hash_lock);
+ raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index 5045048ce244..a57a25f06a21 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -13,6 +13,17 @@ static inline bool stress_slb(void)
return static_branch_unlikely(&stress_slb_key);
}
+extern bool stress_hpt_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static inline bool stress_hpt(void)
+{
+ return static_branch_unlikely(&stress_hpt_key);
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
+
void slb_setup_new_exec(void);
void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index f6151a589298..85c84e89e3ea 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -100,14 +100,14 @@ static void do_serialize(void *arg)
}
/*
- * Serialize against find_current_mm_pte which does lock-less
+ * Serialize against __find_linux_pte() which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
- * find_current_mm_pte to finish.
+ * __find_linux_pte() to finish.
*/
void serialize_against_pte_lookup(struct mm_struct *mm)
{
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 84d171953ba4..8b121df7b08f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -344,7 +344,6 @@ void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
mark_initmem_nx();
- static_branch_enable(&init_mem_is_free);
free_initmem_default(POISON_FREE_INITMEM);
ftrace_free_init_tramp();
}
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 0d04f9d5da8d..2fb3edafe9ab 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -19,7 +19,6 @@
#include <asm/cacheflush.h>
#include <asm/kdump.h>
#include <mm/mmu_decl.h>
-#include <generated/utsrelease.h>
struct regions {
unsigned long pa_start;
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 2c15c86c7015..a903b308acc5 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
mmu_get_tsize(mmu_virtual_psize), 0);
}
EXPORT_SYMBOL(local_flush_tlb_page);
+
+void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page_psize);
+
#endif
/*
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 43f1c76d48ce..a379b0ce19ff 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
- /* First arg comes in as a 32 bits pointer. */
- EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
- EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+ /* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+ EMIT(PPC_RAW_LI(_R4, 0));
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 4
+
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
- /*
- * Initialize tail_call_cnt in stack frame if we do tail calls.
- * Otherwise, put in NOPs so that it can be skipped when we are
- * invoked through a tail call.
- */
if (ctx->seen & SEEN_TAILCALL)
- EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1,
- bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
- else
- EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
-#define BPF_TAILCALL_PROLOGUE_SIZE 16
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
/*
* We need a stack frame, but we don't necessarily need to
@@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
-}
-
-void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
- EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
-
- bpf_jit_emit_common_epilogue(image, ctx);
-
- /* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+ /* Tear down our stack frame */
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
EMIT(PPC_RAW_BLR());
}
@@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
- EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
@@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* goto *(prog->bpf_func + prologue_size); */
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
-
EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
-
- if (ctx->seen & SEEN_FUNC)
- EMIT(PPC_RAW_MTLR(_R0));
-
EMIT(PPC_RAW_MTCTR(_R3));
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+ /* Put tail_call_cnt in r4 */
+ EMIT(PPC_RAW_MR(_R4, _R0));
+
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 082f6d0308a4..6b4434dd0ff3 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -27,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
{
if (sp & 0xf)
return 0; /* must be 16-byte aligned */
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return 0;
if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
return 1;
@@ -53,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
sp = regs->gpr[1];
perf_callchain_store(entry, perf_instruction_pointer(regs));
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return;
for (;;) {
@@ -61,12 +61,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
next_sp = fp[0];
if (next_sp == sp + STACK_INT_FRAME_SIZE &&
- fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) &&
+ fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
/*
* This looks like an interrupt frame for an
* interrupt that occurred in the kernel
*/
- regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+ regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
next_ip = regs->nip;
lr = regs->link;
level = 0;
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
index 8965b4463d43..5e86371a20c7 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id)
)
#include I(REQUEST_END)
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
/*
* Not available for counter_info_version >= 0x8, use
* run_instruction_cycles_by_partition(0x100) instead.
@@ -92,6 +93,7 @@ REQUEST(__field(0, 8, partition_id)
__count(0x10, 8, cycles)
)
#include I(REQUEST_END)
+#endif
#define REQUEST_NAME system_performance_capabilities
#define REQUEST_NUM 0x40
@@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged)
)
#include I(REQUEST_END)
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
#define REQUEST_NAME processor_bus_utilization_abc_links
#define REQUEST_NUM 0x50
#define REQUEST_IDX_KIND "hw_chip_id=?"
@@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx)
__count(0x28, 8, instructions_completed)
)
#include I(REQUEST_END)
+#endif
/* Processor_core_power_mode (0x95) skipped, no counters */
/* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 5eb60ed5b5e8..7ff8ff3509f5 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -70,9 +70,9 @@ static const struct attribute_group format_group = {
.attrs = format_attrs,
};
-static const struct attribute_group event_group = {
+static struct attribute_group event_group = {
.name = "events",
- .attrs = hv_gpci_event_attrs,
+ /* .attrs is set in init */
};
#define HV_CAPS_ATTR(_name, _format) \
@@ -330,6 +330,7 @@ static int hv_gpci_init(void)
int r;
unsigned long hret;
struct hv_perf_caps caps;
+ struct hv_gpci_request_buffer *arg;
hv_gpci_assert_offsets_correct();
@@ -353,6 +354,36 @@ static int hv_gpci_init(void)
/* sampling not supported */
h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the output
+ * counter_info_version value based on the system hypervisor.
+ * Pass the counter request 0x10 corresponds to request type
+ * 'Dispatch_timebase_by_processor', to get the supported
+ * counter_info_version.
+ */
+ arg->params.counter_request = cpu_to_be32(0x10);
+
+ r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+ if (r) {
+ pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
+ arg->params.counter_info_version_out = 0x8;
+ }
+
+ /*
+ * Use counter_info_version_out value to assign
+ * required hv-gpci event list.
+ */
+ if (arg->params.counter_info_version_out >= 0x8)
+ event_group.attrs = hv_gpci_event_attrs;
+ else
+ event_group.attrs = hv_gpci_event_attrs_v6;
+
+ put_cpu_var(hv_gpci_reqb);
+
r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
if (r)
return r;
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index 4d108262bed7..c72020912dea 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -26,6 +26,7 @@ enum {
#define REQUEST_FILE "../hv-gpci-requests.h"
#define NAME_LOWER hv_gpci
#define NAME_UPPER HV_GPCI
+#define ENABLE_EVENTS_COUNTERINFO_V6
#include "req-gen/perf.h"
#undef REQUEST_FILE
#undef NAME_LOWER
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
index fa9bc804e67a..6b2a59fefffa 100644
--- a/arch/powerpc/perf/req-gen/perf.h
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \
#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
r_fields
+/* Generate event list for platforms with counter_info_version 0x6 or below */
+static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
+#include REQUEST_FILE
+ NULL
+};
+
+/*
+ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
+ * events were deprecated for platform firmware that supports
+ * counter_info_version 0x8 or above.
+ * Those deprecated events are still part of platform firmware that
+ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
+ * v1.018 documentation there is no counter_info_version 0x7.
+ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
+ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
+ * that supports counter_info_version 0x8 or above.
+ */
+#undef ENABLE_EVENTS_COUNTERINFO_V6
+
+/* Generate event list for platforms with counter_info_version 0x8 or above*/
static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
#include REQUEST_FILE
NULL
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index f03432ef010b..cefa313c09f0 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -5,15 +5,17 @@
* Copyright (c) 2008-2009 PIKA Technologies
* Sean MacLennan <smaclennan@pikatech.com>
*/
+#include <linux/err.h>
#include <linux/init.h>
#include <linux/of_platform.h>
#include <linux/kthread.h>
+#include <linux/leds.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -92,8 +94,6 @@ static int __init warp_post_info(void)
static LIST_HEAD(dtm_shutdown_list);
static void __iomem *dtm_fpga;
-static unsigned green_led, red_led;
-
struct dtm_shutdown {
struct list_head list;
@@ -101,7 +101,6 @@ struct dtm_shutdown {
void *arg;
};
-
int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
{
struct dtm_shutdown *shutdown;
@@ -132,6 +131,35 @@ int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
return -EINVAL;
}
+#define WARP_GREEN_LED 0
+#define WARP_RED_LED 1
+
+static struct gpio_led warp_gpio_led_pins[] = {
+ [WARP_GREEN_LED] = {
+ .name = "green",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
+ [WARP_RED_LED] = {
+ .name = "red",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
+};
+
+static struct gpio_led_platform_data warp_gpio_led_data = {
+ .leds = warp_gpio_led_pins,
+ .num_leds = ARRAY_SIZE(warp_gpio_led_pins),
+};
+
+static struct platform_device warp_gpio_leds = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev = {
+ .platform_data = &warp_gpio_led_data,
+ },
+};
+
static irqreturn_t temp_isr(int irq, void *context)
{
struct dtm_shutdown *shutdown;
@@ -139,7 +167,7 @@ static irqreturn_t temp_isr(int irq, void *context)
local_irq_disable();
- gpio_set_value(green_led, 0);
+ gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
/* Run through the shutdown list. */
list_for_each_entry(shutdown, &dtm_shutdown_list, list)
@@ -153,7 +181,7 @@ static irqreturn_t temp_isr(int irq, void *context)
out_be32(dtm_fpga + 0x14, reset);
}
- gpio_set_value(red_led, value);
+ gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value);
value ^= 1;
mdelay(500);
}
@@ -162,25 +190,78 @@ static irqreturn_t temp_isr(int irq, void *context)
return IRQ_HANDLED;
}
+/*
+ * Because green and red power LEDs are normally driven by leds-gpio driver,
+ * but in case of critical temperature shutdown we want to drive them
+ * ourselves, we acquire both and then create leds-gpio platform device
+ * ourselves, instead of doing it through device tree. This way we can still
+ * keep access to the gpios and use them when needed.
+ */
static int pika_setup_leds(void)
{
struct device_node *np, *child;
+ struct gpio_desc *gpio;
+ struct gpio_led *led;
+ int led_count = 0;
+ int error;
+ int i;
- np = of_find_compatible_node(NULL, NULL, "gpio-leds");
+ np = of_find_compatible_node(NULL, NULL, "warp-power-leds");
if (!np) {
printk(KERN_ERR __FILE__ ": Unable to find leds\n");
return -ENOENT;
}
- for_each_child_of_node(np, child)
- if (of_node_name_eq(child, "green"))
- green_led = of_get_gpio(child, 0);
- else if (of_node_name_eq(child, "red"))
- red_led = of_get_gpio(child, 0);
+ for_each_child_of_node(np, child) {
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+
+ if (!of_node_name_eq(child, led->name))
+ continue;
+
+ if (led->gpiod) {
+ printk(KERN_ERR __FILE__ ": %s led has already been defined\n",
+ led->name);
+ continue;
+ }
+
+ gpio = fwnode_gpiod_get_index(of_fwnode_handle(child),
+ NULL, 0, GPIOD_ASIS,
+ led->name);
+ error = PTR_ERR_OR_ZERO(gpio);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n",
+ led->name, error);
+ of_node_put(child);
+ goto err_cleanup_pins;
+ }
+
+ led->gpiod = gpio;
+ led_count++;
+ }
+ }
of_node_put(np);
+ /* Skip device registration if no leds have been defined */
+ if (led_count) {
+ error = platform_device_register(&warp_gpio_leds);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n",
+ error);
+ goto err_cleanup_pins;
+ }
+ }
+
return 0;
+
+err_cleanup_pins:
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+ gpiod_put(led->gpiod);
+ led->gpiod = NULL;
+ }
+ return error;
}
static void pika_setup_critical_temp(struct device_node *np,
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 48038aaedbd3..6d1dd6e87478 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -59,6 +59,8 @@ static struct mpc52xx_lpbfifo lpbfifo;
/**
* mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred
+ *
+ * @req: Pointer to request structure
*/
static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
{
@@ -178,6 +180,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
/**
* mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO
+ * @irq: IRQ number to be handled
+ * @dev_id: device ID cookie
*
* On transmit, the dma completion irq triggers before the fifo completion
* triggers. Handle the dma completion here instead of the LPB FIFO Bestcomm
@@ -216,6 +220,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
* or nested spinlock condition. The out path is non-trivial, so
* extra fiddling is done to make sure all paths lead to the same
* outbound code.
+ *
+ * Return: irqreturn code (%IRQ_HANDLED)
*/
static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
{
@@ -320,8 +326,12 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
/**
* mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task
+ * @irq: IRQ number to be handled
+ * @dev_id: device ID cookie
*
* Only used when receiving data.
+ *
+ * Return: irqreturn code (%IRQ_HANDLED)
*/
static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
{
@@ -372,7 +382,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
}
/**
- * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion
+ * mpc52xx_lpbfifo_poll - Poll for DMA completion
*/
void mpc52xx_lpbfifo_poll(void)
{
@@ -393,6 +403,8 @@ EXPORT_SYMBOL(mpc52xx_lpbfifo_poll);
/**
* mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request.
* @req: Pointer to request structure
+ *
+ * Return: %0 on success, -errno code on error
*/
int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req)
{
@@ -531,6 +543,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op)
err_bcom_rx_irq:
bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
err_bcom_rx:
+ free_irq(lpbfifo.irq, &lpbfifo);
err_irq:
iounmap(lpbfifo.regs);
lpbfifo.regs = NULL;
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index e12cb44e717f..caa96edf0e72 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -107,7 +107,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto next;
unreg:
- platform_device_del(pdev);
+ platform_device_put(pdev);
err:
pr_err("%pOF: registration failed\n", np);
next:
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index e14d1b74d4e4..751395cbf022 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -7,10 +7,13 @@
* Copyright 2012 by Servergy, Inc.
*/
+#define pr_fmt(fmt) "gpio-halt: " fmt
+
+#include <linux/err.h>
#include <linux/platform_device.h>
#include <linux/device.h>
+#include <linux/gpio/consumer.h>
#include <linux/module.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
@@ -18,7 +21,8 @@
#include <asm/machdep.h>
-static struct device_node *halt_node;
+static struct gpio_desc *halt_gpio;
+static int halt_irq;
static const struct of_device_id child_match[] = {
{
@@ -36,23 +40,10 @@ static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
static void __noreturn gpio_halt_cb(void)
{
- enum of_gpio_flags flags;
- int trigger, gpio;
-
- if (!halt_node)
- panic("No reset GPIO information was provided in DT\n");
-
- gpio = of_get_gpio_flags(halt_node, 0, &flags);
-
- if (!gpio_is_valid(gpio))
- panic("Provided GPIO is invalid\n");
-
- trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
- printk(KERN_INFO "gpio-halt: triggering GPIO.\n");
+ pr_info("triggering GPIO.\n");
/* Probably wont return */
- gpio_set_value(gpio, trigger);
+ gpiod_set_value(halt_gpio, 1);
panic("Halt failed\n");
}
@@ -61,95 +52,78 @@ static void __noreturn gpio_halt_cb(void)
* to handle the shutdown/poweroff. */
static irqreturn_t gpio_halt_irq(int irq, void *__data)
{
- printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n");
+ struct platform_device *pdev = __data;
+
+ dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n");
schedule_work(&gpio_halt_wq);
return IRQ_HANDLED;
};
-static int gpio_halt_probe(struct platform_device *pdev)
+static int __gpio_halt_probe(struct platform_device *pdev,
+ struct device_node *halt_node)
{
- enum of_gpio_flags flags;
- struct device_node *node = pdev->dev.of_node;
- struct device_node *child_node;
- int gpio, err, irq;
- int trigger;
-
- if (!node)
- return -ENODEV;
-
- /* If there's no matching child, this isn't really an error */
- child_node = of_find_matching_node(node, child_match);
- if (!child_node)
- return 0;
-
- /* Technically we could just read the first one, but punish
- * DT writers for invalid form. */
- if (of_gpio_count(child_node) != 1) {
- err = -EINVAL;
- goto err_put;
- }
-
- /* Get the gpio number relative to the dynamic base. */
- gpio = of_get_gpio_flags(child_node, 0, &flags);
- if (!gpio_is_valid(gpio)) {
- err = -EINVAL;
- goto err_put;
- }
+ int err;
- err = gpio_request(gpio, "gpio-halt");
+ halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node),
+ NULL, 0, GPIOD_OUT_LOW, "gpio-halt");
+ err = PTR_ERR_OR_ZERO(halt_gpio);
if (err) {
- printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n",
- gpio);
- goto err_put;
+ dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err);
+ return err;
}
- trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
- gpio_direction_output(gpio, !trigger);
-
/* Now get the IRQ which tells us when the power button is hit */
- irq = irq_of_parse_and_map(child_node, 0);
- err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING |
- IRQF_TRIGGER_FALLING, "gpio-halt", child_node);
+ halt_irq = irq_of_parse_and_map(halt_node, 0);
+ err = request_irq(halt_irq, gpio_halt_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "gpio-halt", pdev);
if (err) {
- printk(KERN_ERR "gpio-halt: error requesting IRQ %d for "
- "GPIO %d.\n", irq, gpio);
- gpio_free(gpio);
- goto err_put;
+ dev_err(&pdev->dev, "failed to request IRQ %d: %d\n",
+ halt_irq, err);
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
+ return err;
}
/* Register our halt function */
ppc_md.halt = gpio_halt_cb;
pm_power_off = gpio_halt_cb;
- printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d"
- " irq).\n", gpio, trigger, irq);
+ dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq);
- halt_node = child_node;
return 0;
-
-err_put:
- of_node_put(child_node);
- return err;
}
-static int gpio_halt_remove(struct platform_device *pdev)
+static int gpio_halt_probe(struct platform_device *pdev)
{
- if (halt_node) {
- int gpio = of_get_gpio(halt_node, 0);
- int irq = irq_of_parse_and_map(halt_node, 0);
+ struct device_node *halt_node;
+ int ret;
+
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ /* If there's no matching child, this isn't really an error */
+ halt_node = of_find_matching_node(pdev->dev.of_node, child_match);
+ if (!halt_node)
+ return -ENODEV;
+
+ ret = __gpio_halt_probe(pdev, halt_node);
+ of_node_put(halt_node);
- free_irq(irq, halt_node);
+ return ret;
+}
- ppc_md.halt = NULL;
- pm_power_off = NULL;
+static int gpio_halt_remove(struct platform_device *pdev)
+{
+ free_irq(halt_irq, pdev);
+ cancel_work_sync(&gpio_halt_wq);
- gpio_free(gpio);
+ ppc_md.halt = NULL;
+ pm_power_off = NULL;
- of_node_put(halt_node);
- halt_node = NULL;
- }
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
return 0;
}
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 0c4eed9aea80..9563336e3348 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -135,6 +135,7 @@ config GENERIC_CPU
depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select ARCH_HAS_FAST_MULTIPLIER
select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
config POWERPC_CPU
bool "Generic 32 bits powerpc"
@@ -160,17 +161,20 @@ config POWER7_CPU
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
config POWER8_CPU
bool "POWER8"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
config POWER9_CPU
bool "POWER9"
depends on PPC_BOOK3S_64
select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_HAS_LBARX_LHARX
config POWER10_CPU
bool "POWER10"
@@ -184,6 +188,7 @@ config E5500_CPU
config E6500_CPU
bool "Freescale e6500"
depends on PPC64 && PPC_E500
+ select PPC_HAS_LBARX_LHARX
config 405_CPU
bool "40x family"
@@ -575,10 +580,10 @@ config CPU_LITTLE_ENDIAN
endchoice
config PPC64_ELF_ABI_V1
- def_bool PPC64 && CPU_BIG_ENDIAN
+ def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2)
config PPC64_ELF_ABI_V2
- def_bool PPC64 && CPU_LITTLE_ENDIAN
+ def_bool PPC64 && !PPC64_ELF_ABI_V1
config PPC64_BOOT_WRAPPER
def_bool n
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index bf300167ad6b..913b77b92cea 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -294,7 +294,7 @@ static struct platform_driver gpio_mdio_driver =
},
};
-static int gpio_mdio_init(void)
+static int __init gpio_mdio_init(void)
{
struct device_node *np;
@@ -314,7 +314,7 @@ static int gpio_mdio_init(void)
}
module_init(gpio_mdio_init);
-static void gpio_mdio_exit(void)
+static void __exit gpio_mdio_exit(void)
{
platform_driver_unregister(&gpio_mdio_driver);
if (gpio_regs)
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 04daa7f0a03c..4f7ee885a78f 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -70,9 +70,7 @@
#undef SHOW_GATWICK_IRQS
-int ppc_override_l2cr = 0;
-int ppc_override_l2cr_value;
-int has_l2cache = 0;
+static int has_l2cache;
int pmac_newworld;
@@ -236,22 +234,16 @@ static void __init l2cr_init(void)
const unsigned int *l2cr =
of_get_property(np, "l2cr-value", NULL);
if (l2cr) {
- ppc_override_l2cr = 1;
- ppc_override_l2cr_value = *l2cr;
_set_L2CR(0);
- _set_L2CR(ppc_override_l2cr_value);
+ _set_L2CR(*l2cr);
+ pr_info("L2CR overridden (0x%x), backside cache is %s\n",
+ *l2cr, ((*l2cr) & 0x80000000) ?
+ "enabled" : "disabled");
}
of_node_put(np);
break;
}
}
-
- if (ppc_override_l2cr)
- printk(KERN_INFO "L2CR overridden (0x%x), "
- "backside cache is %s\n",
- ppc_override_l2cr_value,
- (ppc_override_l2cr_value & 0x80000000)
- ? "enabled" : "disabled");
}
#endif
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 2502e9b17df4..38a7e02295c8 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -466,7 +466,7 @@ static struct attribute *ps3_system_bus_dev_attrs[] = {
};
ATTRIBUTE_GROUPS(ps3_system_bus_dev);
-struct bus_type ps3_system_bus_type = {
+static struct bus_type ps3_system_bus_type = {
.name = "ps3_system_bus",
.match = ps3_system_bus_match,
.uevent = ps3_system_bus_uevent,
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8e40ccac0f44..6b507b62ce8f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -154,7 +154,7 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
/**
* pseries_eeh_phb_reset - Reset the specified PHB
* @phb: PCI controller
- * @config_adddr: the associated config address
+ * @config_addr: the associated config address
* @option: reset option
*
* Reset the specified PHB/PE
@@ -188,7 +188,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
/**
* pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
* @phb: PCI controller
- * @config_adddr: the associated config address
+ * @config_addr: the associated config address
*
* The function will be called to reconfigure the bridges included
* in the specified PE so that the mulfunctional PE would be recovered
@@ -848,16 +848,7 @@ static int __init eeh_pseries_init(void)
}
/* Initialize error log size */
- eeh_error_buf_size = rtas_token("rtas-error-log-max");
- if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
- pr_info("%s: unknown EEH error log size\n",
- __func__);
- eeh_error_buf_size = 1024;
- } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
- pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
- __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
- eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
- }
+ eeh_error_buf_size = rtas_get_error_log_max();
/* Set EEH probe mode */
eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index e0a7ac5db15d..090ae5a1e0f5 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -70,6 +70,7 @@ static void pseries_cpu_offline_self(void)
xics_teardown_cpu();
unregister_slb_shadow(hwcpu);
+ unregister_vpa(hwcpu);
rtas_stop_self();
/* Should never get here... */
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 762eb15d3bd4..783c16ad648b 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -27,7 +27,9 @@ hcall_tracepoint_refcount:
/*
* precall must preserve all registers. use unused STK_PARAM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. STK_PARAM() in the caller's
+ * frame will be available even on ELFv2 because these are all
+ * variadic functions.
*/
#define HCALL_INST_PRECALL(FIRST_REG) \
mflr r0; \
@@ -41,29 +43,29 @@ hcall_tracepoint_refcount:
std r10,STK_PARAM(R10)(r1); \
std r0,16(r1); \
addi r4,r1,STK_PARAM(FIRST_REG); \
- stdu r1,-STACK_FRAME_OVERHEAD(r1); \
+ stdu r1,-STACK_FRAME_MIN_SIZE(r1); \
bl __trace_hcall_entry; \
- ld r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
- ld r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1); \
- ld r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1); \
- ld r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1); \
- ld r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1); \
- ld r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1); \
- ld r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1); \
- ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
+ ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
+ ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \
+ ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \
+ ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \
+ ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \
+ ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \
+ ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \
+ ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1)
/*
* postcall is performed immediately before function return which
* allows liberal use of volatile registers.
*/
#define __HCALL_INST_POSTCALL \
- ld r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
- std r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1); \
+ ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
+ std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
mr r4,r3; \
mr r3,r0; \
bl __trace_hcall_exit; \
- ld r0,STACK_FRAME_OVERHEAD+16(r1); \
- addi r1,r1,STACK_FRAME_OVERHEAD; \
+ ld r0,STACK_FRAME_MIN_SIZE+16(r1); \
+ addi r1,r1,STACK_FRAME_MIN_SIZE; \
ld r3,STK_PARAM(R3)(r1); \
mtlr r0
@@ -303,14 +305,14 @@ plpar_hcall9_trace:
mr r7,r8
mr r8,r9
mr r9,r10
- ld r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
- ld r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
- ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
+ ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1)
+ ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1)
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1)
HVSC
mr r0,r12
- ld r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
std r4,0(r12)
std r5,8(r12)
std r6,16(r12)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 561adac69022..c74b71d4733d 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -248,7 +248,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
* Set up the page with TCE data, looping through and setting
* the values.
*/
- limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
+ limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE);
for (l = 0; l < limit; l++) {
tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 507dc0b5987d..63fd925ccbb8 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -35,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
+#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ /*
+ * The hypervisor assigns VAS resources based
+ * on entitled capacity for shared mode.
+ * Reconfig VAS windows based on DLPAR CPU events.
+ */
+ if (pseries_vas_dlpar_cpu() != 0)
+ retval = H_HARDWARE;
+ }
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 634fac5db3f9..4cea71aa0f41 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -635,10 +635,13 @@ retry:
prod_others();
}
/*
- * Execution may have been suspended for several seconds, so
- * reset the watchdog.
+ * Execution may have been suspended for several seconds, so reset
+ * the watchdogs. touch_nmi_watchdog() also touches the soft lockup
+ * watchdog.
*/
+ rcu_cpu_stall_reset();
touch_nmi_watchdog();
+
return ret;
}
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index f4b5b5a64db3..4edd1585e245 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -75,7 +75,7 @@ static int pseries_status_to_err(int rc)
case H_FUNCTION:
err = -ENXIO;
break;
- case H_P1:
+ case H_PARAMETER:
case H_P2:
case H_P3:
case H_P4:
@@ -111,7 +111,7 @@ static int pseries_status_to_err(int rc)
err = -EEXIST;
break;
case H_ABORTED:
- err = -EINTR;
+ err = -EIO;
break;
default:
err = -EINVAL;
@@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer)
if (consumer > PKS_OS_OWNER)
return ERR_PTR(-EINVAL);
- auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL);
+ auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL);
if (!auth)
return ERR_PTR(-ENOMEM);
auth->version = 1;
auth->consumer = consumer;
- auth->rsvd0 = 0;
- auth->rsvd1 = 0;
- if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) {
- auth->passwordlength = 0;
+ if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER)
return auth;
- }
memcpy(auth->password, ospassword, ospasswordlength);
@@ -312,10 +308,6 @@ int plpks_write_var(struct plpks_var var)
if (!rc)
rc = plpks_confirm_object_flushed(label, auth);
- if (rc)
- pr_err("Failed to write variable %s for component %s with error %d\n",
- var.name, var.component, rc);
-
rc = pseries_status_to_err(rc);
kfree(label);
out:
@@ -350,10 +342,6 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname)
if (!rc)
rc = plpks_confirm_object_flushed(label, auth);
- if (rc)
- pr_err("Failed to remove variable %s for component %s with error %d\n",
- vname.name, component, rc);
-
rc = pseries_status_to_err(rc);
kfree(label);
out:
@@ -366,22 +354,24 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var)
{
unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
struct plpks_auth *auth;
- struct label *label;
+ struct label *label = NULL;
u8 *output;
int rc;
if (var->namelen > MAX_NAME_SIZE)
return -EINVAL;
- auth = construct_auth(PKS_OS_OWNER);
+ auth = construct_auth(consumer);
if (IS_ERR(auth))
return PTR_ERR(auth);
- label = construct_label(var->component, var->os, var->name,
- var->namelen);
- if (IS_ERR(label)) {
- rc = PTR_ERR(label);
- goto out_free_auth;
+ if (consumer == PKS_OS_OWNER) {
+ label = construct_label(var->component, var->os, var->name,
+ var->namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out_free_auth;
+ }
}
output = kzalloc(maxobjsize, GFP_KERNEL);
@@ -390,13 +380,17 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var)
goto out_free_label;
}
- rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
- virt_to_phys(label), label->size, virt_to_phys(output),
- maxobjsize);
+ if (consumer == PKS_OS_OWNER)
+ rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size, virt_to_phys(output),
+ maxobjsize);
+ else
+ rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(var->name), var->namelen, virt_to_phys(output),
+ maxobjsize);
+
if (rc != H_SUCCESS) {
- pr_err("Failed to read variable %s for component %s with error %d\n",
- var->name, var->component, rc);
rc = pseries_status_to_err(rc);
goto out_free_output;
}
diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h
index c6a291367bb1..275ccd86bfb5 100644
--- a/arch/powerpc/platforms/pseries/plpks.h
+++ b/arch/powerpc/platforms/pseries/plpks.h
@@ -17,7 +17,7 @@
#define WORLDREADABLE 0x08000000
#define SIGNEDUPDATE 0x01000000
-#define PLPKS_VAR_LINUX 0x01
+#define PLPKS_VAR_LINUX 0x02
#define PLPKS_VAR_COMMON 0x04
struct plpks_var {
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 0e0524cbe20c..4ad6e510d405 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -200,17 +200,42 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
- rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
- if (!rc) {
- tsk_ref = &txwin->vas_win.task_ref;
- vas_dump_crb(&crb);
- vas_update_csb(&crb, tsk_ref);
+ while (atomic_read(&txwin->pending_faults)) {
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+ atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
+ * irq_default_primary_handler() can be used only with IRQF_ONESHOT
+ * which disables IRQ before executing the thread handler and enables
+ * it after. But this disabling interrupt sets the VAS IRQ OFF
+ * state in the hypervisor. If the NX generates fault interrupt
+ * during this window, the hypervisor will not deliver this
+ * interrupt to the LPAR. So use VAS specific IRQ handler instead
+ * of calling the default primary handler.
+ */
+static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+
+ /*
+ * The thread hanlder will process this interrupt if it is
+ * already running.
+ */
+ atomic_inc(&txwin->pending_faults);
+
+ return IRQ_WAKE_THREAD;
+}
+
+/*
* Allocate window and setup IRQ mapping.
*/
static int allocate_setup_window(struct pseries_vas_window *txwin,
@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
- rc = request_threaded_irq(txwin->fault_virq, NULL,
- pseries_vas_fault_thread_fn, IRQF_ONESHOT,
+ rc = request_threaded_irq(txwin->fault_virq,
+ pseries_vas_irq_handler,
+ pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex);
return rc;
}
+
+int pseries_vas_dlpar_cpu(void)
+{
+ int new_nr_creds, rc;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
+ }
+
+ if (rc)
+ pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+ return rc;
+}
+
/*
* Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on
@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
- int new_nr_creds, len, rc = 0;
+ int len;
+
+ /*
+ * For shared CPU partition, the hypervisor assigns total credits
+ * based on entitled core capacity. So updating VAS windows will
+ * be called from lparcfg_write().
+ */
+ if (is_shared_processor())
+ return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
- rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
- vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
- (u64)virt_to_phys(&hv_cop_caps));
- if (!rc) {
- new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
- rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
- new_nr_creds);
- }
-
- if (rc)
- pr_err("Failed reconfig VAS capabilities with DLPAR\n");
-
- return rc;
+ return pseries_vas_dlpar_cpu();
}
static struct notifier_block pseries_vas_nb = {
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
index 333ffa2f9f42..7115043ec488 100644
--- a/arch/powerpc/platforms/pseries/vas.h
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags;
char *name;
int fault_virq;
+ atomic_t pending_faults; /* Number of pending faults */
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action);
+int pseries_vas_dlpar_cpu(void);
#else
static inline int vas_migration_handler(int action)
{
return 0;
}
+static inline int pseries_vas_dlpar_cpu(void)
+{
+ return 0;
+}
#endif
#endif /* _VAS_H */
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 974d3db6faab..b7232c46b244 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1139,6 +1139,19 @@ void __init fsl_pci_assign_primary(void)
}
/*
+ * If there's no PCI host bridge with ISA then check for
+ * PCI host bridge with alias "pci0" (first PCI host bridge).
+ */
+ np = of_find_node_by_path("pci0");
+ if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) {
+ fsl_pci_primary = np;
+ of_node_put(np);
+ return;
+ }
+ if (np)
+ of_node_put(np);
+
+ /*
* If there's no PCI host bridge with ISA, arbitrarily
* designate one as primary. This can go away once
* various bugs with primary-less systems are fixed.
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index a439e33eae06..d75064fb7d12 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -20,7 +20,7 @@
#define MPIC_MSGR_REGISTERS_PER_BLOCK 4
#define MPIC_MSGR_STRIDE 0x10
-#define MPIC_MSGR_MER_OFFSET 0x100
+#define MPIC_MSGR_MER_OFFSET (0x100 / sizeof(u32))
#define MSGR_INUSE 0
#define MSGR_FREE 1
@@ -234,7 +234,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i;
msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE;
- msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET);
+ msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET;
msgr->in_use = MSGR_FREE;
msgr->num = i;
raw_spin_lock_init(&msgr->lock);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 3925825954bc..19d880ebc5e6 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -535,13 +535,13 @@ static bool __init xive_parse_provisioning(struct device_node *np)
static void __init xive_native_setup_pools(void)
{
/* Allocate a pool big enough */
- pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids);
+ pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids);
xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
- pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
+ pr_err("Failed to allocate pool VP, KVM might not function\n");
- pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n",
+ pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n",
xive_pool_vps, nr_cpu_ids);
}
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index e2c8f93b535b..e45419264391 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -439,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
if (!data->trig_mmio) {
+ iounmap(data->eoi_mmio);
pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
return -ENOMEM;
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f51c882bf902..0da66bc4823d 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1525,9 +1525,9 @@ bpt_cmds(void)
cmd = inchar();
switch (cmd) {
- static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
- int mode;
- case 'd': /* bd - hardware data breakpoint */
+ case 'd': { /* bd - hardware data breakpoint */
+ static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+ int mode;
if (xmon_is_ro) {
printf(xmon_ro_msg);
break;
@@ -1560,6 +1560,7 @@ bpt_cmds(void)
force_enable_xmon();
break;
+ }
case 'i': /* bi - hardware instr breakpoint */
if (xmon_is_ro) {
@@ -1720,7 +1721,6 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp,
}
#define LRSAVE_OFFSET (STACK_FRAME_LR_SAVE * sizeof(unsigned long))
-#define MARKER_OFFSET (STACK_FRAME_MARKER * sizeof(unsigned long))
static void xmon_show_stack(unsigned long sp, unsigned long lr,
unsigned long pc)
@@ -1781,14 +1781,13 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
xmon_print_symbol(ip, " ", "\n");
}
- /* Look for "regshere" marker to see if this is
+ /* Look for "regs" marker to see if this is
an exception frame. */
- if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
+ if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long))
&& marker == STACK_FRAME_REGS_MARKER) {
- if (mread(sp + STACK_FRAME_OVERHEAD, &regs, sizeof(regs))
- != sizeof(regs)) {
+ if (mread(sp + STACK_INT_FRAME_REGS, &regs, sizeof(regs)) != sizeof(regs)) {
printf("Couldn't read registers at %lx\n",
- sp + STACK_FRAME_OVERHEAD);
+ sp + STACK_INT_FRAME_REGS);
break;
}
printf("--- Exception: %lx %s at ", regs.trap,