diff options
232 files changed, 6749 insertions, 3642 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem index 92e2db0e2d3d..95254cec92bf 100644 --- a/Documentation/ABI/testing/sysfs-bus-papr-pmem +++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem @@ -39,9 +39,11 @@ KernelVersion: v5.9 Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev, Description: (RO) Report various performance stats related to papr-scm NVDIMM - device. Each stat is reported on a new line with each line - composed of a stat-identifier followed by it value. Below are - currently known dimm performance stats which are reported: + device. This attribute is only available for NVDIMM devices + that support reporting NVDIMM performance stats. Each stat is + reported on a new line with each line composed of a + stat-identifier followed by it value. Below are currently known + dimm performance stats which are reported: * "CtlResCt" : Controller Reset Count * "CtlResTm" : Controller Reset Elapsed Time diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index df46324d5090..d01e3401581d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,7 +140,9 @@ config PPC select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 + select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX && !PPC_BOOK3S_32 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -266,6 +268,7 @@ config PPC select PPC_DAWR if PPC64 select RTC_LIB select SPARSE_IRQ + select STRICT_KERNEL_RWX if STRICT_MODULE_RWX select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select VIRT_TO_BUS if !PPC64 @@ -289,6 +292,7 @@ config PANIC_TIMEOUT config COMPAT bool "Enable support for 32bit binaries" depends on PPC64 + depends on !CC_IS_CLANG || CLANG_VERSION >= 120000 default y if !CPU_LITTLE_ENDIAN select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION @@ -422,7 +426,7 @@ source "kernel/Kconfig.hz" config MATH_EMULATION bool "Math emulation" - depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE + depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT select PPC_FPU_REGS help Some PowerPC chips designed for embedded applications do not have diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 6342f9da4545..205cd77f321f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -84,6 +84,11 @@ config MSI_BITMAP_SELFTEST config PPC_IRQ_SOFT_MASK_DEBUG bool "Include extra checks for powerpc irq soft masking" + depends on PPC64 + +config PPC_RFI_SRR_DEBUG + bool "Include extra checks for RFI SRR register validity" + depends on PPC_BOOK3S_64 config XMON bool "Include xmon kernel debugger" diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 3212d076ac6a..712c5e8768ce 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -376,6 +376,16 @@ ppc64_book3e_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc32_randconfig +ppc32_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \ + -f $(srctree)/Makefile randconfig + +PHONY += ppc64_randconfig +ppc64_randconfig: + $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \ + -f $(srctree)/Makefile randconfig + define archhelp @echo '* zImage - Build default images selected by kernel config' @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)' diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 2b8da923ceca..e312ea802aa6 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -163,6 +163,8 @@ src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c +src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c + src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) src-boot := $(src-wlib) $(src-plat) empty.c @@ -227,7 +229,7 @@ $(obj)/wrapper.a: $(obj-wlib) FORCE hostprogs := addnote hack-coff mktree -targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) +targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \ $(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds @@ -355,6 +357,8 @@ image-$(CONFIG_MVME5100) += dtbImage.mvme5100 # Board port in arch/powerpc/platform/amigaone/Kconfig image-$(CONFIG_AMIGAONE) += cuImage.amigaone +image-$(CONFIG_PPC_MICROWATT) += dtbImage.microwatt + # For 32-bit powermacs, build the COFF and miboot images # as well as the ELF images. ifdef CONFIG_PPC32 diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c index 6098b879ac97..977eb15a6d17 100644 --- a/arch/powerpc/boot/decompress.c +++ b/arch/powerpc/boot/decompress.c @@ -99,8 +99,8 @@ static void print_err(char *s) * partial_decompress - decompresses part or all of a compressed buffer * @inbuf: input buffer * @input_size: length of the input buffer - * @outbuf: input buffer - * @output_size: length of the input buffer + * @outbuf: output buffer + * @output_size: length of the output buffer * @skip number of output bytes to ignore * * This function takes compressed data from inbuf, decompresses and write it to diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c index 5d91036ad626..58fbcfcc98c9 100644 --- a/arch/powerpc/boot/devtree.c +++ b/arch/powerpc/boot/devtree.c @@ -13,6 +13,7 @@ #include "string.h" #include "stdio.h" #include "ops.h" +#include "of.h" void dt_fixup_memory(u64 start, u64 size) { @@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size) root = finddevice("/"); if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0) naddr = 2; + else + naddr = be32_to_cpu(naddr); if (naddr < 1 || naddr > 2) fatal("Can't cope with #address-cells == %d in /\n\r", naddr); if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0) nsize = 1; + else + nsize = be32_to_cpu(nsize); if (nsize < 1 || nsize > 2) fatal("Can't cope with #size-cells == %d in /\n\r", nsize); i = 0; if (naddr == 2) - memreg[i++] = start >> 32; - memreg[i++] = start & 0xffffffff; + memreg[i++] = cpu_to_be32(start >> 32); + memreg[i++] = cpu_to_be32(start & 0xffffffff); if (nsize == 2) - memreg[i++] = size >> 32; - memreg[i++] = size & 0xffffffff; + memreg[i++] = cpu_to_be32(size >> 32); + memreg[i++] = cpu_to_be32(size & 0xffffffff); memory = finddevice("/memory"); if (! memory) { @@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size) setprop_str(memory, "device_type", "memory"); } - printf("Memory <- <0x%x", memreg[0]); + printf("Memory <- <0x%x", be32_to_cpu(memreg[0])); for (i = 1; i < (naddr + nsize); i++) - printf(" 0x%x", memreg[i]); + printf(" 0x%x", be32_to_cpu(memreg[i])); printf("> (%ldMB)\n\r", (unsigned long)(size >> 20)); setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32)); @@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus) printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus)); while ((devp = find_node_by_devtype(devp, "cpu"))) { - setprop_val(devp, "clock-frequency", cpu); - setprop_val(devp, "timebase-frequency", tb); + setprop_val(devp, "clock-frequency", cpu_to_be32(cpu)); + setprop_val(devp, "timebase-frequency", cpu_to_be32(tb)); if (bus > 0) - setprop_val(devp, "bus-frequency", bus); + setprop_val(devp, "bus-frequency", cpu_to_be32(bus)); } timebase_period_ns = 1000000000 / tb; @@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq) if (devp) { printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq)); - setprop_val(devp, "clock-frequency", freq); + setprop_val(devp, "clock-frequency", cpu_to_be32(freq)); } } @@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize) { if (getprop(node, "#address-cells", naddr, 4) != 4) *naddr = 2; + else + *naddr = be32_to_cpu(*naddr); if (getprop(node, "#size-cells", nsize, 4) != 4) *nsize = 1; + else + *nsize = be32_to_cpu(*nsize); } static void copy_val(u32 *dest, u32 *src, int naddr) @@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr) int i, carry = 0; for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) { - u64 tmp = (u64)reg[i] + add[i] + carry; + u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry; carry = tmp >> 32; - reg[i] = (u32)tmp; + reg[i] = cpu_to_be32((u32)tmp); } return !carry; @@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize) u32 end; for (i = 0; i < MAX_ADDR_CELLS; i++) { - if (reg[i] < range[i]) + if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i])) return 0; - if (reg[i] > range[i]) + if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i])) break; } for (i = 0; i < MAX_ADDR_CELLS; i++) { - end = range[i] + rangesize[i]; + end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]); - if (reg[i] < end) + if (be32_to_cpu(reg[i]) < end) break; - if (reg[i] > end) + if (be32_to_cpu(reg[i]) > end) return 0; } @@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, return 0; dt_get_reg_format(parent, &naddr, &nsize); - if (nsize > 2) return 0; @@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, copy_val(last_addr, prop_buf + offset, naddr); - ret_size = prop_buf[offset + naddr]; + ret_size = be32_to_cpu(prop_buf[offset + naddr]); if (nsize == 2) { ret_size <<= 32; - ret_size |= prop_buf[offset + naddr + 1]; + ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]); } for (;;) { @@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, offset = find_range(last_addr, prop_buf, prev_naddr, naddr, prev_nsize, buflen / 4); - if (offset < 0) return 0; @@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, if (naddr > 2) return 0; - ret_addr = ((u64)last_addr[2] << 32) | last_addr[3]; - + ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]); if (sizeof(void *) == 4 && (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL || ret_addr + ret_size > 0x100000000ULL)) @@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat) int dt_get_virtual_reg(void *node, void **addr, int nres) { unsigned long xaddr; - int n; + int n, i; n = getprop(node, "virtual-reg", addr, nres * 4); - if (n > 0) + if (n > 0) { + for (i = 0; i < n/4; i ++) + ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]); return n / 4; + } for (n = 0; n < nres; n++) { if (!dt_xlate_reg(node, n, &xaddr, NULL)) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts new file mode 100644 index 000000000000..974abbdda249 --- /dev/null +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -0,0 +1,138 @@ +/dts-v1/; + +/ { + #size-cells = <0x02>; + #address-cells = <0x02>; + model-name = "microwatt"; + compatible = "microwatt-soc"; + + aliases { + serial0 = &UART0; + }; + + reserved-memory { + #size-cells = <0x02>; + #address-cells = <0x02>; + ranges; + }; + + memory@0 { + device_type = "memory"; + reg = <0x00000000 0x00000000 0x00000000 0x10000000>; + }; + + cpus { + #size-cells = <0x00>; + #address-cells = <0x01>; + + ibm,powerpc-cpu-features { + display-name = "Microwatt"; + isa = <3000>; + device_type = "cpu-features"; + compatible = "ibm,powerpc-cpu-features"; + + mmu-radix { + isa = <3000>; + usable-privilege = <2>; + }; + + little-endian { + isa = <2050>; + usable-privilege = <3>; + hwcap-bit-nr = <1>; + }; + + cache-inhibited-large-page { + isa = <2040>; + usable-privilege = <2>; + }; + + fixed-point-v3 { + isa = <3000>; + usable-privilege = <3>; + }; + + no-execute { + isa = <2010>; + usable-privilege = <2>; + }; + + floating-point { + hwcap-bit-nr = <27>; + isa = <0>; + usable-privilege = <3>; + }; + }; + + PowerPC,Microwatt@0 { + i-cache-sets = <2>; + ibm,dec-bits = <64>; + reservation-granule-size = <64>; + clock-frequency = <100000000>; + timebase-frequency = <100000000>; + i-tlb-sets = <1>; + ibm,ppc-interrupt-server#s = <0>; + i-cache-block-size = <64>; + d-cache-block-size = <64>; + d-cache-sets = <2>; + i-tlb-size = <64>; + cpu-version = <0x990000>; + status = "okay"; + i-cache-size = <0x1000>; + ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>; + tlb-size = <0>; + tlb-sets = <0>; + device_type = "cpu"; + d-tlb-size = <128>; + d-tlb-sets = <2>; + reg = <0>; + general-purpose; + 64-bit; + d-cache-size = <0x1000>; + ibm,chip-id = <0>; + }; + }; + + soc@c0000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + interrupt-parent = <&ICS>; + + ranges = <0 0 0xc0000000 0x40000000>; + + interrupt-controller@4000 { + compatible = "openpower,xics-presentation", "ibm,ppc-xicp"; + ibm,interrupt-server-ranges = <0x0 0x1>; + reg = <0x4000 0x100>; + }; + + ICS: interrupt-controller@5000 { + compatible = "openpower,xics-sources"; + interrupt-controller; + interrupt-ranges = <0x10 0x10>; + reg = <0x5000 0x100>; + #address-cells = <0>; + #size-cells = <0>; + #interrupt-cells = <2>; + }; + + UART0: serial@2000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x2000 0x8>; + clock-frequency = <100000000>; + current-speed = <115200>; + reg-shift = <2>; + fifo-size = <16>; + interrupts = <0x10 0x1>; + }; + }; + + chosen { + bootargs = ""; + ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 + 00 00 00 00 00 00 00 00 40 00 40]; + stdout-path = &UART0; + }; +}; diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c new file mode 100644 index 000000000000..ca9d83617fc1 --- /dev/null +++ b/arch/powerpc/boot/microwatt.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <stddef.h> +#include "stdio.h" +#include "types.h" +#include "io.h" +#include "ops.h" + +BSS_STACK(8192); + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5) +{ + unsigned long heapsize = 16*1024*1024 - (unsigned long)_end; + + /* + * Disable interrupts and turn off MSR_RI, since we'll + * shortly be overwriting the interrupt vectors. + */ + __asm__ volatile("mtmsrd %0,1" : : "r" (0)); + + simple_alloc_init(_end, heapsize, 32, 64); + fdt_init(_dtb_start); + serial_console_init(); +} diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index b0da4466d419..f16d2be1d0f3 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -15,6 +15,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "of.h" #define UART_DLL 0 /* Out: Divisor Latch Low */ #define UART_DLM 1 /* Out: Divisor Latch High */ @@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp) int n; u32 reg_offset; - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) + if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) { + printf("virt reg parse fail...\r\n"); return -1; + } n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); if (n == sizeof(reg_offset)) - reg_base += reg_offset; + reg_base += be32_to_cpu(reg_offset); n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; + else + reg_shift = be32_to_cpu(reg_shift); scdp->open = ns16550_open; scdp->putc = ns16550_putc; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index cdb796b76e2e..1f4676bab786 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -342,6 +342,11 @@ gamecube|wii) link_address='0x600000' platformo="$object/$platform-head.o $object/$platform.o" ;; +microwatt) + link_address='0x500000' + platformo="$object/fixed-head.o $object/$platform.o" + binary=y + ;; treeboot-currituck) link_address='0x1000000' ;; diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S index 7b2ff2eaa73a..d0ffb493614d 100644 --- a/arch/powerpc/boot/zImage.ps3.lds.S +++ b/arch/powerpc/boot/zImage.ps3.lds.S @@ -8,7 +8,7 @@ SECTIONS .kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) } _vmlinux_end = .; - . = ALIGN(4096); + . = ALIGN(8); _dtb_start = .; .kernel:dtb : { *(.kernel:dtb) } _dtb_end = .; diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config new file mode 100644 index 000000000000..ad6546850c68 --- /dev/null +++ b/arch/powerpc/configs/32-bit.config @@ -0,0 +1 @@ +# CONFIG_PPC64 is not set diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config new file mode 100644 index 000000000000..0fe6406929e2 --- /dev/null +++ b/arch/powerpc/configs/64-bit.config @@ -0,0 +1 @@ +CONFIG_PPC64=y diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig new file mode 100644 index 000000000000..a08b739123da --- /dev/null +++ b/arch/powerpc/configs/microwatt_defconfig @@ -0,0 +1,98 @@ +# CONFIG_SWAP is not set +# CONFIG_CROSS_MEMORY_ATTACH is not set +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_TICK_CPU_ACCOUNTING=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_BLK_DEV_INITRD=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_SLUB_DEBUG is not set +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB_MERGE_DEFAULT is not set +CONFIG_PPC64=y +# CONFIG_PPC_KUEP is not set +# CONFIG_PPC_KUAP is not set +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_NR_IRQS=64 +CONFIG_PANIC_TIMEOUT=10 +# CONFIG_PPC_POWERNV is not set +# CONFIG_PPC_PSERIES is not set +CONFIG_PPC_MICROWATT=y +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set +CONFIG_CPU_FREQ=y +CONFIG_HZ_100=y +# CONFIG_PPC_MEM_KEYS is not set +# CONFIG_SECCOMP is not set +# CONFIG_MQ_IOSCHED_KYBER is not set +# CONFIG_COREDUMP is not set +# CONFIG_COMPACTION is not set +# CONFIG_MIGRATION is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_PACKET_DIAG=y +CONFIG_UNIX=y +CONFIG_UNIX_DIAG=y +CONFIG_INET=y +CONFIG_INET_UDP_DIAG=y +CONFIG_INET_RAW_DIAG=y +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_PARTITIONED_MASTER=y +CONFIG_MTD_SPI_NOR=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_NETDEVICES=y +# CONFIG_WLAN is not set +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SERIAL_NONSTANDARD=y +# CONFIG_NVRAM is not set +CONFIG_RANDOM_TRUST_CPU=y +CONFIG_SPI=y +CONFIG_SPI_DEBUG=y +CONFIG_SPI_BITBANG=y +CONFIG_SPI_SPIDEV=y +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_IOMMU_SUPPORT is not set +# CONFIG_NVMEM is not set +CONFIG_EXT4_FS=y +# CONFIG_FILE_LOCKING is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_XZ_DEC_X86 is not set +# CONFIG_XZ_DEC_IA64 is not set +# CONFIG_XZ_DEC_ARM is not set +# CONFIG_XZ_DEC_ARMTHUMB is not set +# CONFIG_XZ_DEC_SPARC is not set +CONFIG_PRINTK_TIME=y +# CONFIG_SYMBOLIC_ERRNAME is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_MISC is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_FTRACE is not set +# CONFIG_STRICT_DEVMEM is not set +CONFIG_PPC_DISABLE_WERROR=y +CONFIG_XMON=y +CONFIG_XMON_DEFAULT=y +# CONFIG_XMON_DEFAULT_RO_MODE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 949ff9ccda5e..d21f266cea9a 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -57,3 +57,28 @@ CONFIG_CRC32_SLICEBY4=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_PPC_16K_PAGES=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_FS=y +CONFIG_PPC_PTDUMP=y +CONFIG_MODULES=y +CONFIG_SPI=y +CONFIG_SPI_FSL_SPI=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_DEV_TALITOS=y +CONFIG_8xx_GPIO=y +CONFIG_WATCHDOG=y +CONFIG_8xxx_WDT=y +CONFIG_SMC_UCODE_PATCH=y +CONFIG_ADVANCED_OPTIONS=y +CONFIG_PIN_TLB=y +CONFIG_PERF_EVENTS=y +CONFIG_MATH_EMULATION=y +CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y +CONFIG_STRICT_KERNEL_RWX=y +CONFIG_IPV6=y +CONFIG_BPF_JIT=y +CONFIG_DEBUG_VM_PGTABLE=y +CONFIG_BDI_SWITCH=y +CONFIG_PPC_EARLY_DEBUG=y +CONFIG_PPC_EARLY_DEBUG_CPM_ADDR=0xff002008 diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 2c87e856d839..8bfeea6c7de7 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -309,6 +309,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_FUNCTION_TRACER=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PPC_EMULATED_STATS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 701811c91a6f..0ad2291337a7 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -368,7 +368,9 @@ CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_FUNCTION_TRACER=y +CONFIG_FTRACE_SYSCALLS=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 50168dde4ea5..b183629f1bcf 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -289,7 +289,9 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_FUNCTION_TRACER=y +CONFIG_FTRACE_SYSCALLS=y CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_CODE_PATCHING_SELFTEST=y CONFIG_FTR_FIXUP_SELFTEST=y diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 02ee6f5ac9fe..222823861a67 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -71,8 +71,13 @@ void __init machine_init(u64 dt_ptr); #endif long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs); notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv); -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr); +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs); +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs); +#ifdef CONFIG_PPC64 +unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs); +unsigned long interrupt_exit_user_restart(struct pt_regs *regs); +unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs); +#endif long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low); diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 7ae29cfb06c0..f0e687236484 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -46,6 +46,8 @@ # define SMPWMB eieio #endif +/* clang defines this macro for a builtin, which will not work with runtime patching */ +#undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define dma_rmb() __lwsync() #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h deleted file mode 100644 index 2a0a467d2985..000000000000 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H -#define _ASM_POWERPC_BOOK3S_32_HASH_H -#ifdef __KERNEL__ - -/* - * The "classic" 32-bit implementation of the PowerPC MMU uses a hash - * table containing PTEs, together with a set of 16 segment registers, - * to define the virtual to physical address mapping. - * - * We use the hash table as an extended TLB, i.e. a cache of currently - * active mappings. We maintain a two-level page table tree, much - * like that used by the i386, for the sake of the Linux memory - * management code. Low-level assembler code in hash_low_32.S - * (procedure hash_page) is responsible for extracting ptes from the - * tree and putting them into the hash table when necessary, and - * updating the accessed and modified bits in the page table tree. - */ - -#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ -#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ -#define _PAGE_USER 0x004 /* usermode access allowed */ -#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ -#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ -#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ -#define _PAGE_DIRTY 0x080 /* C: page changed */ -#define _PAGE_ACCESSED 0x100 /* R: page referenced */ -#define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_RW 0x400 /* software: user write access allowed */ -#define _PAGE_SPECIAL 0x800 /* software: Special page */ - -#ifdef CONFIG_PTE_64BIT -/* We never clear the high word of the pte */ -#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) -#else -#define _PTE_NONE_MASK _PAGE_HASHPTE -#endif - -#define _PMD_PRESENT 0 -#define _PMD_PRESENT_MASK (PAGE_MASK) -#define _PMD_BAD (~PAGE_MASK) - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 1670dfe9d4f1..64201125a287 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -7,35 +7,104 @@ #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> + +extern struct static_key_false disable_kuap_key; +extern struct static_key_false disable_kuep_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); +} + +static __always_inline bool kuep_is_disabled(void) +{ + return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); +} + +static inline void kuep_lock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) | SR_NX); +} + +static inline void kuep_unlock(void) +{ + if (kuep_is_disabled()) + return; + + update_user_segments(mfsr(0) & ~SR_NX); +} + #ifdef CONFIG_PPC_KUAP #include <linux/sched.h> -static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) +#define KUAP_NONE (~0UL) +#define KUAP_ALL (~1UL) + +static inline void kuap_lock_one(unsigned long addr) { - addr &= 0xf0000000; /* align addr to start of segment */ - barrier(); /* make sure thread.kuap is updated before playing with SRs */ - while (addr < end) { - mtsr(sr, addr); - sr += 0x111; /* next VSID */ - sr &= 0xf0ffffff; /* clear VSID overflow */ - addr += 0x10000000; /* address of next segment */ - } + mtsr(mfsr(addr) | SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_unlock_one(unsigned long addr) +{ + mtsr(mfsr(addr) & ~SR_KS, addr); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_lock_all(void) +{ + update_user_segments(mfsr(0) | SR_KS); + isync(); /* Context sync required after mtsr() */ +} + +static inline void kuap_unlock_all(void) +{ + update_user_segments(mfsr(0) & ~SR_KS); isync(); /* Context sync required after mtsr() */ } +void kuap_lock_all_ool(void); +void kuap_unlock_all_ool(void); + +static inline void kuap_lock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_lock_one(addr); + else if (!ool) + kuap_lock_all(); + else + kuap_lock_all_ool(); +} + +static inline void kuap_unlock(unsigned long addr, bool ool) +{ + if (likely(addr != KUAP_ALL)) + kuap_unlock_one(addr); + else if (!ool) + kuap_unlock_all(); + else + kuap_unlock_all_ool(); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; - u32 addr = kuap & 0xf0000000; - u32 end = kuap << 28; + + if (kuap_is_disabled()) + return; regs->kuap = kuap; - if (unlikely(!kuap)) + if (unlikely(kuap == KUAP_NONE)) return; - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* Set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, false); } static inline void kuap_user_restore(struct pt_regs *regs) @@ -44,22 +113,22 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - u32 addr = regs->kuap & 0xf0000000; - u32 end = regs->kuap << 28; + if (kuap_is_disabled()) + return; current->thread.kuap = regs->kuap; - if (unlikely(regs->kuap == kuap)) - return; - - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + kuap_unlock(regs->kuap, false); } static inline unsigned long kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0); + if (kuap_is_disabled()) + return KUAP_NONE; + + WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); return kuap; } @@ -72,84 +141,78 @@ static inline void kuap_assert_locked(void) static __always_inline void allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) { - u32 addr, end; + if (kuap_is_disabled()) + return; BUILD_BUG_ON(!__builtin_constant_p(dir)); - BUILD_BUG_ON(dir & ~KUAP_READ_WRITE); if (!(dir & KUAP_WRITE)) return; - addr = (__force u32)to; - - if (unlikely(addr >= TASK_SIZE || !size)) - return; - - end = min(addr + size, TASK_SIZE); - - current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); - kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end); /* Clear Ks */ + current->thread.kuap = (__force u32)to; + kuap_unlock_one((__force u32)to); } -static __always_inline void prevent_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { - u32 addr, end; - - BUILD_BUG_ON(!__builtin_constant_p(dir)); + u32 kuap = current->thread.kuap; - if (dir & KUAP_CURRENT_WRITE) { - u32 kuap = current->thread.kuap; - - if (unlikely(!kuap)) - return; + if (kuap_is_disabled()) + return; - addr = kuap & 0xf0000000; - end = kuap << 28; - } else if (dir & KUAP_WRITE) { - addr = (__force u32)to; - end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); - if (unlikely(addr >= TASK_SIZE || !size)) - return; - } else { + if (!(dir & KUAP_WRITE)) return; - } - current->thread.kuap = 0; - kuap_update_sr(mfsr(addr) | SR_KS, addr, end); /* set Ks */ + current->thread.kuap = KUAP_NONE; + kuap_lock(kuap, true); } static inline unsigned long prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; - if (flags) - prevent_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (kuap_is_disabled()) + return KUAP_NONE; + + if (flags != KUAP_NONE) { + current->thread.kuap = KUAP_NONE; + kuap_lock(flags, true); + } return flags; } static inline void restore_user_access(unsigned long flags) { - unsigned long addr = flags & 0xf0000000; - unsigned long end = flags << 28; - void __user *to = (__force void __user *)addr; + if (kuap_is_disabled()) + return; - if (flags) - allow_user_access(to, to, end - addr, KUAP_READ_WRITE); + if (flags != KUAP_NONE) { + current->thread.kuap = flags; + kuap_unlock(flags, true); + } } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - unsigned long begin = regs->kuap & 0xf0000000; - unsigned long end = regs->kuap << 28; + unsigned long kuap = regs->kuap; + + if (kuap_is_disabled()) + return false; + + if (!is_write || kuap == KUAP_ALL) + return false; + if (kuap == KUAP_NONE) + return true; + + /* If faulting address doesn't match unlocked segment, unlock all */ + if ((kuap ^ address) & 0xf0000000) + regs->kuap = KUAP_ALL; - return is_write && (address < begin || address >= end); + return false; } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index b85f8e114a9c..f5be185cbdf8 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -67,6 +67,16 @@ struct ppc_bat { #ifndef __ASSEMBLY__ /* + * This macro defines the mapping from contexts to VSIDs (virtual + * segment IDs). We use a skew on both the context and the high 4 bits + * of the 32-bit virtual address (the "effective segment ID") in order + * to spread out the entries in the MMU hash table. Note, if this + * function is changed then hash functions will have to be + * changed to correspond. + */ +#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) + +/* * Hardware Page Table Entry * Note that the xpn and x bitfields are used only by processors that * support extended addressing; otherwise, those bits are reserved. @@ -102,6 +112,37 @@ extern s32 patch__hash_page_B, patch__hash_page_C; extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2; extern s32 patch__flush_hash_B; +#include <asm/reg.h> +#include <asm/task_size_32.h> + +static __always_inline void update_user_segment(u32 n, u32 val) +{ + if (n << 28 < TASK_SIZE) + mtsr(val + n * 0x111, n << 28); +} + +static __always_inline void update_user_segments(u32 val) +{ + val &= 0xf0ffffff; + + update_user_segment(0, val); + update_user_segment(1, val); + update_user_segment(2, val); + update_user_segment(3, val); + update_user_segment(4, val); + update_user_segment(5, val); + update_user_segment(6, val); + update_user_segment(7, val); + update_user_segment(8, val); + update_user_segment(9, val); + update_user_segment(10, val); + update_user_segment(11, val); + update_user_segment(12, val); + update_user_segment(13, val); + update_user_segment(14, val); + update_user_segment(15, val); +} + #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 83c65845a1a9..609c80f67194 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -4,7 +4,43 @@ #include <asm-generic/pgtable-nopmd.h> -#include <asm/book3s/32/hash.h> +/* + * The "classic" 32-bit implementation of the PowerPC MMU uses a hash + * table containing PTEs, together with a set of 16 segment registers, + * to define the virtual to physical address mapping. + * + * We use the hash table as an extended TLB, i.e. a cache of currently + * active mappings. We maintain a two-level page table tree, much + * like that used by the i386, for the sake of the Linux memory + * management code. Low-level assembler code in hash_low_32.S + * (procedure hash_page) is responsible for extracting ptes from the + * tree and putting them into the hash table when necessary, and + * updating the accessed and modified bits in the page table tree. + */ + +#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ +#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ +#define _PAGE_USER 0x004 /* usermode access allowed */ +#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ +#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ +#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ +#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */ +#define _PAGE_DIRTY 0x080 /* C: page changed */ +#define _PAGE_ACCESSED 0x100 /* R: page referenced */ +#define _PAGE_EXEC 0x200 /* software: exec allowed */ +#define _PAGE_RW 0x400 /* software: user write access allowed */ +#define _PAGE_SPECIAL 0x800 /* software: Special page */ + +#ifdef CONFIG_PTE_64BIT +/* We never clear the high word of the pte */ +#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE) +#else +#define _PTE_NONE_MASK _PAGE_HASHPTE +#endif + +#define _PMD_PRESENT 0 +#define _PMD_PRESENT_MASK (PAGE_MASK) +#define _PMD_BAD (~PAGE_MASK) /* And here we include common definitions */ diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 9700da3a4093..a1cc73a88710 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -398,8 +398,7 @@ static __always_inline void allow_user_access(void __user *to, const void __user #endif /* !CONFIG_PPC_KUAP */ -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); if (static_branch_unlikely(&uaccess_flush_key)) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index a666d561b44d..4d9941b2fe51 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -232,6 +232,9 @@ extern unsigned long __pmd_frag_size_shift; #define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) +#define MAX_PTRS_PER_PGD (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \ + H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE)) + /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index d5da7ddbf0fc..350de8f90250 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -91,7 +91,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, } #define HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { #ifdef __powerpc64__ u64 res = (__force u64)csum; diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index f1d029bf906e..a95f63788c6b 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,13 +23,13 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags); -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr); +int patch_branch(u32 *addr, unsigned long target, int flags); +int patch_instruction(u32 *addr, struct ppc_inst instr); +int raw_patch_instruction(u32 *addr, struct ppc_inst instr); static inline unsigned long patch_site_addr(s32 *site) { @@ -38,18 +38,18 @@ static inline unsigned long patch_site_addr(s32 *site) static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) { - return patch_instruction((struct ppc_inst *)patch_site_addr(site), instr); + return patch_instruction((u32 *)patch_site_addr(site), instr); } static inline int patch_branch_site(s32 *site, unsigned long target, int flags) { - return patch_branch((struct ppc_inst *)patch_site_addr(site), target, flags); + return patch_branch((u32 *)patch_site_addr(site), target, flags); } static inline int modify_instruction(unsigned int *addr, unsigned int clr, unsigned int set) { - return patch_instruction((struct ppc_inst *)addr, ppc_inst((*addr & ~clr) | set)); + return patch_instruction(addr, ppc_inst((*addr & ~clr) | set)); } static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set) @@ -59,10 +59,8 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int instr_is_relative_branch(struct ppc_inst instr); int instr_is_relative_link_branch(struct ppc_inst instr); -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr); -unsigned long branch_target(const struct ppc_inst *instr); -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src); +unsigned long branch_target(const u32 *instr); +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); extern bool is_conditional_branch(struct ppc_inst instr); #ifdef CONFIG_PPC_BOOK3E_64 void __patch_exception(int exc, unsigned long addr); @@ -73,9 +71,9 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 (PPC_INST_ADDIS | __PPC_RT(R2)) -#define ADDIS_R2_R12 (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12)) -#define ADDI_R2_R2 (PPC_INST_ADDI | __PPC_RT(R2) | __PPC_RA(R2)) +#define LIS_R2 (PPC_RAW_LIS(_R2, 0)) +#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0)) +#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0)) static inline unsigned long ppc_function_entry(void *func) @@ -180,12 +178,10 @@ static inline unsigned long ppc_kallsyms_lookup_name(const char *name) #define R2_STACK_OFFSET 40 #endif -#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ - ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) +#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET) /* usually preceded by a mflr r0 */ -#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ - ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) +#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF) #endif /* CONFIG_PPC64 */ #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 4cb9efa2eb21..242204e12993 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -16,7 +16,7 @@ .section ".head.data.\name\()","a",@progbits .endm .macro use_ftsec name - .section ".head.text.\name\()" + .section ".head.text.\name\()","ax",@progbits .endm /* diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7e4b2cef40c2..9bcf345cb208 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -294,6 +294,13 @@ #define H_RESIZE_HPT_COMMIT 0x370 #define H_REGISTER_PROC_TBL 0x37C #define H_SIGNAL_SYS_RESET 0x380 +#define H_ALLOCATE_VAS_WINDOW 0x388 +#define H_MODIFY_VAS_WINDOW 0x38C +#define H_DEALLOCATE_VAS_WINDOW 0x390 +#define H_QUERY_VAS_WINDOW 0x394 +#define H_QUERY_VAS_CAPABILITIES 0x398 +#define H_QUERY_NX_CAPABILITIES 0x39C +#define H_GET_NX_FAULT 0x3A0 #define H_INT_GET_SOURCE_INFO 0x3A8 #define H_INT_SET_SOURCE_CONFIG 0x3AC #define H_INT_GET_SOURCE_CONFIG 0x3B0 @@ -393,6 +400,9 @@ #define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7 +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8 +#define H_CPU_BEHAV_NO_STF_BARRIER (1ull << 54) // IBM bit 9 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 56a98936a6a9..21cc571ea9c2 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -18,8 +18,17 @@ * PACA flags in paca->irq_happened. * * This bits are set when interrupts occur while soft-disabled - * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS - * is set whenever we manually hard disable. + * and allow a proper replay. + * + * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost + * always in synch with the MSR[EE] state, except: + * - A window in interrupt entry, where hardware disables MSR[EE] and that + * must be "reconciled" with the soft mask state. + * - NMI interrupts that hit in awkward places, until they fix the state. + * - When local irqs are being enabled and state is being fixed up. + * - When returning from an interrupt there are some windows where this + * can become out of synch, but gets fixed before the RFI or before + * executing the next user instruction (see arch/powerpc/kernel/interrupt.c). */ #define PACA_IRQ_HARD_DIS 0x01 #define PACA_IRQ_DBELL 0x02 @@ -389,7 +398,15 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline void may_hard_irq_enable(void) { } +static inline bool may_hard_irq_enable(void) +{ + return false; +} + +static inline void do_hard_irq_enable(void) +{ + BUILD_BUG(); +} static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) { diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index 268d3bd073c8..b11c0e2f9639 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -8,17 +8,17 @@ #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ - long __gui_ret = 0; \ - unsigned long __gui_ptr = (unsigned long)ptr; \ + long __gui_ret; \ + u32 __user *__gui_ptr = (u32 __user *)ptr; \ struct ppc_inst __gui_inst; \ unsigned int __prefix, __suffix; \ - __gui_ret = gu_op(__prefix, (unsigned int __user *)__gui_ptr); \ + \ + __chk_user_ptr(ptr); \ + __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ if ((__prefix >> 26) == OP_PREFIX) { \ - __gui_ret = gu_op(__suffix, \ - (unsigned int __user *)__gui_ptr + 1); \ - __gui_inst = ppc_inst_prefix(__prefix, \ - __suffix); \ + __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ + __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else { \ __gui_inst = ppc_inst(__prefix); \ } \ @@ -29,14 +29,15 @@ }) #else /* !CONFIG_PPC64 */ #define ___get_user_instr(gu_op, dest, ptr) \ - gu_op((dest).val, (u32 __user *)(ptr)) +({ \ + __chk_user_ptr(ptr); \ + gu_op((dest).val, (u32 __user *)(ptr)); \ +}) #endif /* CONFIG_PPC64 */ -#define get_user_instr(x, ptr) \ - ___get_user_instr(get_user, x, ptr) +#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) -#define __get_user_instr(x, ptr) \ - ___get_user_instr(__get_user, x, ptr) +#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr) /* * Instruction data type for POWER @@ -59,9 +60,9 @@ static inline int ppc_inst_primary_opcode(struct ppc_inst x) return ppc_inst_val(x) >> 26; } -#ifdef CONFIG_PPC64 -#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff }) +#define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) +#ifdef CONFIG_PPC64 #define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) static inline u32 ppc_inst_suffix(struct ppc_inst x) @@ -69,68 +70,43 @@ static inline u32 ppc_inst_suffix(struct ppc_inst x) return x.suffix; } -static inline bool ppc_inst_prefixed(struct ppc_inst x) -{ - return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff; -} +#else +#define ppc_inst_prefix(x, y) ppc_inst(x) -static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +static inline u32 ppc_inst_suffix(struct ppc_inst x) { - return ppc_inst_prefix(swab32(ppc_inst_val(x)), - swab32(ppc_inst_suffix(x))); + return 0; } -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) -{ - u32 val, suffix; - - val = *(u32 *)ptr; - if ((val >> 26) == OP_PREFIX) { - suffix = *((u32 *)ptr + 1); - return ppc_inst_prefix(val, suffix); - } else { - return ppc_inst(val); - } -} +#endif /* CONFIG_PPC64 */ -static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +static inline struct ppc_inst ppc_inst_read(const u32 *ptr) { - return *(u64 *)&x == *(u64 *)&y; + if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) + return ppc_inst_prefix(*ptr, *(ptr + 1)); + else + return ppc_inst(*ptr); } -#else - -#define ppc_inst(x) ((struct ppc_inst){ .val = x }) - -#define ppc_inst_prefix(x, y) ppc_inst(x) - static inline bool ppc_inst_prefixed(struct ppc_inst x) { - return false; -} - -static inline u32 ppc_inst_suffix(struct ppc_inst x) -{ - return 0; + return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; } static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) { - return ppc_inst(swab32(ppc_inst_val(x))); -} - -static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr) -{ - return *ptr; + return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) { - return ppc_inst_val(x) == ppc_inst_val(y); + if (ppc_inst_val(x) != ppc_inst_val(y)) + return false; + if (!ppc_inst_prefixed(x)) + return true; + return ppc_inst_suffix(x) == ppc_inst_suffix(y); } -#endif /* CONFIG_PPC64 */ - static inline int ppc_inst_len(struct ppc_inst x) { return ppc_inst_prefixed(x) ? 8 : 4; @@ -140,13 +116,13 @@ static inline int ppc_inst_len(struct ppc_inst x) * Return the address of the next instruction, if the instruction @value was * located at @location. */ -static inline struct ppc_inst *ppc_inst_next(void *location, struct ppc_inst *value) +static inline u32 *ppc_inst_next(u32 *location, u32 *value) { struct ppc_inst tmp; tmp = ppc_inst_read(value); - return location + ppc_inst_len(tmp); + return (void *)location + ppc_inst_len(tmp); } static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) @@ -178,6 +154,6 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src); +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 59f704408d65..d4bdf7d274ac 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -73,13 +73,47 @@ #include <asm/kprobes.h> #include <asm/runlatch.h> +#ifdef CONFIG_PPC_BOOK3S_64 +extern char __end_soft_masked[]; +bool search_kernel_soft_mask_table(unsigned long addr); +unsigned long search_kernel_restart_table(unsigned long addr); + +DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); + +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + if (regs->msr & MSR_PR) + return false; + + if (regs->nip >= (unsigned long)__end_soft_masked) + return false; + + return search_kernel_soft_mask_table(regs->nip); +} + +static inline void srr_regs_clobbered(void) +{ + local_paca->srr_valid = 0; + local_paca->hsrr_valid = 0; +} +#else +static inline bool is_implicit_soft_masked(struct pt_regs *regs) +{ + return false; +} + +static inline void srr_regs_clobbered(void) +{ +} +#endif + static inline void nap_adjust_return(struct pt_regs *regs) { #ifdef CONFIG_PPC_970_NAP if (unlikely(test_thread_local_flags(_TLF_NAPPING))) { /* Can avoid a test-and-clear because NMIs do not call this */ clear_thread_local_flags(_TLF_NAPPING); - regs->nip = (unsigned long)power4_idle_nap_return; + regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return); } #endif } @@ -129,9 +163,18 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ - if (TRAP(regs) != INTERRUPT_PROGRAM) + if (TRAP(regs) != INTERRUPT_PROGRAM) { CT_WARN_ON(ct_state() != CONTEXT_KERNEL); + BUG_ON(is_implicit_soft_masked(regs)); + } +#ifdef CONFIG_PPC_BOOK3S + /* Move this under a debugging check */ + if (arch_irq_disabled_regs(regs)) + BUG_ON(search_kernel_restart_table(regs->nip)); +#endif } + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); #endif booke_restore_dbcr0(); @@ -186,6 +229,7 @@ struct interrupt_nmi_state { u8 irq_soft_mask; u8 irq_happened; u8 ftrace_enabled; + u64 softe; #endif }; @@ -211,6 +255,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte #ifdef CONFIG_PPC64 state->irq_soft_mask = local_paca->irq_soft_mask; state->irq_happened = local_paca->irq_happened; + state->softe = regs->softe; /* * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does @@ -220,12 +265,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte local_paca->irq_soft_mask = IRQS_ALL_DISABLED; local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) && - regs->nip < (unsigned long)__end_interrupts) { - // Kernel code running below __end_interrupts is - // implicitly soft-masked. + if (is_implicit_soft_masked(regs)) { + // Adjust regs->softe soft implicit soft-mask, so + // arch_irq_disabled_regs(regs) behaves as expected. regs->softe = IRQS_ALL_DISABLED; } + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); /* Don't do any per-CPU operations until interrupt state is fixed */ @@ -258,11 +304,20 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter */ #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S + if (arch_irq_disabled_regs(regs)) { + unsigned long rst = search_kernel_restart_table(regs->nip); + if (rst) + regs_set_return_ip(regs, rst); + } +#endif + if (nmi_disables_ftrace(regs)) this_cpu_set_ftrace_enabled(state->ftrace_enabled); /* Check we didn't change the pending interrupt mask. */ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened); + regs->softe = state->softe; local_paca->irq_happened = state->irq_happened; local_paca->irq_soft_mask = state->irq_soft_mask; #endif diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index ec96232529ac..1df763002726 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -5,14 +5,6 @@ #define KUAP_READ 1 #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) -/* - * For prevent_user_access() only. - * Use the current saved situation instead of the to/from/size params. - * Used on book3s/32 - */ -#define KUAP_CURRENT_READ 4 -#define KUAP_CURRENT_WRITE 8 -#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kup.h> @@ -46,10 +38,7 @@ void setup_kuep(bool disabled); static inline void setup_kuep(bool disabled) { } #endif /* CONFIG_PPC_KUEP */ -#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) -void kuep_lock(void); -void kuep_unlock(void); -#else +#ifndef CONFIG_PPC_BOOK3S_32 static inline void kuep_lock(void) { } static inline void kuep_unlock(void) { } #endif @@ -83,8 +72,7 @@ static inline unsigned long kuap_get_and_assert_locked(void) #ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } +static inline void prevent_user_access(unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -96,53 +84,53 @@ static __always_inline void setup_kup(void) setup_kuap(disable_kuap); } -static inline void allow_read_from_user(const void __user *from, unsigned long size) +static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); allow_user_access(NULL, from, size, KUAP_READ); } -static inline void allow_write_to_user(void __user *to, unsigned long size) +static __always_inline void allow_write_to_user(void __user *to, unsigned long size) { allow_user_access(to, NULL, size, KUAP_WRITE); } -static inline void allow_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) { barrier_nospec(); allow_user_access(to, from, size, KUAP_READ_WRITE); } -static inline void prevent_read_from_user(const void __user *from, unsigned long size) +static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size, KUAP_READ); + prevent_user_access(KUAP_READ); } -static inline void prevent_write_to_user(void __user *to, unsigned long size) +static __always_inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size, KUAP_WRITE); + prevent_user_access(KUAP_WRITE); } -static inline void prevent_read_write_user(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) { - prevent_user_access(to, from, size, KUAP_READ_WRITE); + prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_access_user(void) +static __always_inline void prevent_current_access_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT); + prevent_user_access(KUAP_READ_WRITE); } -static inline void prevent_current_read_from_user(void) +static __always_inline void prevent_current_read_from_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ); + prevent_user_access(KUAP_READ); } -static inline void prevent_current_write_to_user(void) +static __always_inline void prevent_current_write_to_user(void) { - prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE); + prevent_user_access(KUAP_WRITE); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index 2fca299f7e19..c63105d2c9e7 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -16,10 +16,10 @@ static inline bool is_kvm_guest(void) return static_branch_unlikely(&kvm_guest); } -bool check_kvm_guest(void); +int check_kvm_guest(void); #else static inline bool is_kvm_guest(void) { return false; } -static inline bool check_kvm_guest(void) { return false; } +static inline int check_kvm_guest(void) { return 0; } #endif #endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index ae25e6e72997..4fe018cc207b 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -16,7 +16,7 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { struct pt_regs *regs = ftrace_get_regs(fregs); - regs->nip = ip; + regs_set_return_ip(regs, ip); } #define klp_get_ftrace_location klp_get_ftrace_location diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 607168b1aef4..27016b98ecb2 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -220,7 +220,7 @@ enum { #elif defined(CONFIG_44x) #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x #endif -#if defined(CONFIG_E200) || defined(CONFIG_E500) +#ifdef CONFIG_E500 #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif @@ -324,7 +324,6 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) } #endif /* !CONFIG_DEBUG_VM */ -#ifdef CONFIG_PPC_RADIX_MMU static inline bool radix_enabled(void) { return mmu_has_feature(MMU_FTR_TYPE_RADIX); @@ -334,17 +333,6 @@ static inline bool early_radix_enabled(void) { return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); } -#else -static inline bool radix_enabled(void) -{ - return false; -} - -static inline bool early_radix_enabled(void) -{ - return false; -} -#endif #ifdef CONFIG_STRICT_KERNEL_RWX static inline bool strict_kernel_rwx_enabled(void) @@ -357,6 +345,11 @@ static inline bool strict_kernel_rwx_enabled(void) return false; } #endif + +static inline bool strict_module_rwx_enabled(void) +{ + return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled(); +} #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index db186c539d37..9ba6b585337f 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -57,7 +57,6 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); -extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 295ef5639609..882a0bc7887a 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -9,10 +9,22 @@ #ifndef __ASSEMBLY__ +#include <linux/jump_label.h> + #include <asm/reg.h> +extern struct static_key_false disable_kuap_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + static inline void kuap_save_and_lock(struct pt_regs *regs) { + if (kuap_is_disabled()) + return; + regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } @@ -23,12 +35,20 @@ static inline void kuap_user_restore(struct pt_regs *regs) static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, regs->kuap); } static inline unsigned long kuap_get_and_assert_locked(void) { - unsigned long kuap = mfspr(SPRN_MD_AP); + unsigned long kuap; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + kuap = mfspr(SPRN_MD_AP); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); @@ -38,25 +58,35 @@ static inline unsigned long kuap_get_and_assert_locked(void) static inline void kuap_assert_locked(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) kuap_get_and_assert_locked(); } static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void prevent_user_access(unsigned long dir) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, MD_APG_KUAP); } static inline unsigned long prevent_user_access_return(void) { - unsigned long flags = mfspr(SPRN_MD_AP); + unsigned long flags; + + if (kuap_is_disabled()) + return MD_APG_INIT; + + flags = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); @@ -65,12 +95,18 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { + if (kuap_is_disabled()) + return; + mtspr(SPRN_MD_AP, flags); } static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + if (kuap_is_disabled()) + return false; + return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 2d92a39d8f2e..43ceca128531 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -113,6 +113,7 @@ typedef struct { /* patch sites */ extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; +extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index ec18ac818e3a..dc05a862e72a 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -149,11 +149,9 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S - mm_context_id_t mm_ctx_id; #ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long mm_ctx_slb_addr_limit; #else u16 mm_ctx_user_psize; u16 mm_ctx_sllp; @@ -167,9 +165,16 @@ struct paca_struct { u64 kstack; /* Saved Kernel stack addr */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ +#ifdef CONFIG_PPC64 + u64 exit_save_r1; /* Syscall/interrupt R1 save */ +#endif #ifdef CONFIG_PPC_BOOK3E u16 trap_save; /* Used when bad stack is encountered */ #endif +#ifdef CONFIG_PPC_BOOK3S_64 + u8 hsrr_valid; /* HSRRs set for HRFID */ + u8 srr_valid; /* SRRs set for RFID */ +#endif u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 5969743719bc..d564d0ecd4cd 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -41,6 +41,10 @@ struct mm_struct; #ifndef __ASSEMBLY__ +#ifndef MAX_PTRS_PER_PGD +#define MAX_PTRS_PER_PGD PTRS_PER_PGD +#endif + /* Keep these as a macros to avoid include dependency mess */ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) @@ -72,6 +76,7 @@ extern unsigned long empty_zero_page[]; extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +void poking_init(void); extern unsigned long ioremap_bot; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ac41776661e9..bede76dd3db7 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -76,6 +76,40 @@ #define __REGA0_R30 30 #define __REGA0_R31 31 +/* For use with PPC_RAW_() macros */ +#define _R0 0 +#define _R1 1 +#define _R2 2 +#define _R3 3 +#define _R4 4 +#define _R5 5 +#define _R6 6 +#define _R7 7 +#define _R8 8 +#define _R9 9 +#define _R10 10 +#define _R11 11 +#define _R12 12 +#define _R13 13 +#define _R14 14 +#define _R15 15 +#define _R16 16 +#define _R17 17 +#define _R18 18 +#define _R19 19 +#define _R20 20 +#define _R21 21 +#define _R22 22 +#define _R23 23 +#define _R24 24 +#define _R25 25 +#define _R26 26 +#define _R27 27 +#define _R28 28 +#define _R29 29 +#define _R30 30 +#define _R31 31 + #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define IMM_DS(i) ((uintptr_t)(i) & 0xfffc) #define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0) @@ -222,13 +256,11 @@ #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe -#define PPC_INST_ISYNC 0x4c00012c #define PPC_INST_MCRXR 0x7c000400 #define PPC_INST_MCRXR_MASK 0xfc0007fe #define PPC_INST_MFSPR_PVR 0x7c1f42a6 #define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe #define PPC_INST_MTMSRD 0x7c000164 -#define PPC_INST_NOP 0x60000000 #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_RFEBB 0x4c000124 @@ -241,10 +273,10 @@ #define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe -#define PPC_INST_SC 0x44000002 #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe #define PPC_INST_STRING_GEN_MASK 0xfc00067e +#define PPC_INST_SETB 0x7c000100 #define PPC_INST_STSWI 0x7c0005aa #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_TRECHKPT 0x7c0007dd @@ -252,18 +284,9 @@ #define PPC_INST_TSR 0x7c0005dd #define PPC_INST_LD 0xe8000000 #define PPC_INST_STD 0xf8000000 -#define PPC_INST_MFLR 0x7c0802a6 -#define PPC_INST_MTCTR 0x7c0903a6 -#define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 -#define PPC_INST_BLR 0x4e800020 -#define PPC_INST_BCTR 0x4e800420 -#define PPC_INST_BCTRL 0x4e800421 #define PPC_INST_DIVD 0x7c0003d2 -#define PPC_INST_RLDICR 0x78000004 -#define PPC_INST_ORI 0x60000000 -#define PPC_INST_ORIS 0x64000000 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BL 0x48000001 #define PPC_INST_BRANCH_COND 0x40800000 @@ -323,6 +346,8 @@ #define PPC_LO(v) ((v) & 0xffff) #define PPC_HI(v) (((v) >> 16) & 0xffff) #define PPC_HA(v) PPC_HI((v) + 0x8000) +#define PPC_HIGHER(v) (((v) >> 32) & 0xffff) +#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff) /* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a @@ -383,6 +408,10 @@ #define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b)) #define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21)) #define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21)) +#define PPC_RAW_SC() (0x44000002) +#define PPC_RAW_SYNC() (0x7c0004ac) +#define PPC_RAW_ISYNC() (0x4c00012c) + /* * Define what the VSX XX1 form instructions will look like, then add * the 128 bit load store instructions based on that. @@ -404,10 +433,10 @@ #define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i)) #define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_RAW_PLXVP(xtp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))) -#define PPC_RAW_PSTXVP(xsp, i, a, pr) \ - ((PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) << 32 | (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))) +#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i)) +#define PPC_RAW_PSTXVP_P(xsp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i)) +#define PPC_RAW_PSTXVP_S(xsp, i, a, pr) (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i)) #define PPC_RAW_NAP (0x4c000364) #define PPC_RAW_SLEEP (0x4c0003a4) #define PPC_RAW_WINKLE (0x4c0003e4) @@ -445,16 +474,17 @@ #define PPC_RAW_ADD_DOT(t, a, b) (PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) #define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1) -#define PPC_RAW_NOP() (PPC_INST_NOP) -#define PPC_RAW_BLR() (PPC_INST_BLR) +#define PPC_RAW_NOP() PPC_RAW_ORI(0, 0, 0) +#define PPC_RAW_BLR() (0x4e800020) #define PPC_RAW_BLRL() (0x4e800021) #define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r)) -#define PPC_RAW_MFLR(t) (PPC_INST_MFLR | ___PPC_RT(t)) -#define PPC_RAW_BCTR() (PPC_INST_BCTR) -#define PPC_RAW_MTCTR(r) (PPC_INST_MTCTR | ___PPC_RT(r)) -#define PPC_RAW_ADDI(d, a, i) (PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_MFLR(t) (0x7c0802a6 | ___PPC_RT(t)) +#define PPC_RAW_BCTR() (0x4e800420) +#define PPC_RAW_BCTRL() (0x4e800421) +#define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r)) +#define PPC_RAW_ADDI(d, a, i) (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i) -#define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) +#define PPC_RAW_ADDIS(d, a, i) (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i) @@ -499,8 +529,8 @@ #define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a) -#define PPC_RAW_ORI(d, a, i) (PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) -#define PPC_RAW_ORIS(d, a, i) (PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORI(d, a, i) (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) +#define PPC_RAW_ORIS(d, a, i) (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) #define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i)) @@ -519,7 +549,7 @@ (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me)) #define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb)) -#define PPC_RAW_RLDICR(d, a, i, me) (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) +#define PPC_RAW_RLDICR(d, a, i, me) (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me)) /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ #define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i)) @@ -533,6 +563,8 @@ #define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a)) #define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr)) +#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) +#define PPC_RAW_EIEIO() (0x7c0006ac) /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index d6739d700f0a..116c1519728a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -762,6 +762,21 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) stringify_in_c(.long (_target) - . ;) \ stringify_in_c(.previous) +#define SOFT_MASK_TABLE(_start, _end) \ + stringify_in_c(.section __soft_mask_table,"a";)\ + stringify_in_c(.balign 8;) \ + stringify_in_c(.llong (_start);) \ + stringify_in_c(.llong (_end);) \ + stringify_in_c(.previous) + +#define RESTART_TABLE(_start, _end, _target) \ + stringify_in_c(.section __restart_table,"a";)\ + stringify_in_c(.balign 8;) \ + stringify_in_c(.llong (_start);) \ + stringify_in_c(.llong (_end);) \ + stringify_in_c(.llong (_target);) \ + stringify_in_c(.previous) + #ifdef CONFIG_PPC_FSL_BOOK3E #define BTB_FLUSH(reg) \ lis reg,BUCSR_INIT@h; \ diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 84dd1addd434..c5d984700d24 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -34,14 +34,14 @@ typedef u32 ppc_opcode_t; /* Enable single stepping for the current task */ static inline void enable_single_step(struct pt_regs *regs) { - regs->msr |= MSR_SINGLESTEP; + regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP); #ifdef CONFIG_PPC_ADV_DEBUG_REGS /* * We turn off Critical Input Exception(CE) to ensure that the single * step will be for the instruction we have the probe on; if we don't, * it is possible we'd get the single step reported for CE. */ - regs->msr &= ~MSR_CE; + regs_set_return_msr(regs, regs->msr & ~MSR_CE); mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); #ifdef CONFIG_PPC_47x isync(); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 7bf8a15af224..f348e564f7dd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -276,7 +276,15 @@ struct thread_struct { #define SPEFSCR_INIT #endif -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) +#define INIT_THREAD { \ + .ksp = INIT_SP, \ + .pgdir = swapper_pg_dir, \ + .kuap = ~0UL, /* KUAP_NONE */ \ + .fpexc_mode = MSR_FE0 | MSR_FE1, \ + SPEFSCR_INIT \ +} +#elif defined(CONFIG_PPC32) #define INIT_THREAD { \ .ksp = INIT_SP, \ .pgdir = swapper_pg_dir, \ @@ -339,17 +347,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_end() HMT_medium() -#define spin_until_cond(cond) \ -do { \ - if (unlikely(!(cond))) { \ - spin_begin(); \ - do { \ - spin_cpu_relax(); \ - } while (!(cond)); \ - spin_end(); \ - } \ -} while (0) - #endif /* Check that a certain kernel stack pointer is valid in task_struct p */ diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index e646c7f218bc..8a0d8fb35328 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -71,6 +71,7 @@ struct ps3_dma_region_ops; * @bus_addr: The 'translated' bus address of the region. * @len: The length in bytes of the region. * @offset: The offset from the start of memory of the region. + * @dma_mask: Device dma_mask. * @ioid: The IOID of the device who owns this region * @chunk_list: Opaque variable used by the ioc page manager. * @region_ops: struct ps3_dma_region_ops - dma region operations @@ -85,6 +86,7 @@ struct ps3_dma_region { enum ps3_dma_region_type region_type; unsigned long len; unsigned long offset; + u64 dma_mask; /* driver variables (set by ps3_dma_region_create) */ unsigned long bus_addr; @@ -232,7 +234,7 @@ enum lv1_result { static inline const char* ps3_result(int result) { -#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) +#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT) switch (result) { case LV1_SUCCESS: return "LV1_SUCCESS (0)"; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index b476a685f066..3e5d470a6155 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -48,11 +48,12 @@ struct pt_regs unsigned long result; }; }; - +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP) union { struct { #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long exit_result; #endif union { #ifdef CONFIG_PPC_KUAP @@ -68,6 +69,7 @@ struct pt_regs }; unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; +#endif }; #endif @@ -122,6 +124,41 @@ struct pt_regs #endif /* __powerpc64__ */ #ifndef __ASSEMBLY__ +#include <asm/paca.h> + +#ifdef CONFIG_SMP +extern unsigned long profile_pc(struct pt_regs *regs); +#else +#define profile_pc(regs) instruction_pointer(regs) +#endif + +long do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + +static inline void set_return_regs_changed(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + local_paca->hsrr_valid = 0; + local_paca->srr_valid = 0; +#endif +} + +static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip) +{ + regs->nip = ip; + set_return_regs_changed(); +} + +static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr) +{ + regs->msr = msr; + set_return_regs_changed(); +} + +static inline void regs_add_return_ip(struct pt_regs *regs, long offset) +{ + regs_set_return_ip(regs, regs->nip + offset); +} static inline unsigned long instruction_pointer(struct pt_regs *regs) { @@ -131,7 +168,7 @@ static inline unsigned long instruction_pointer(struct pt_regs *regs) static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { - regs->nip = val; + regs_set_return_ip(regs, val); } static inline unsigned long user_stack_pointer(struct pt_regs *regs) @@ -144,15 +181,6 @@ static inline unsigned long frame_pointer(struct pt_regs *regs) return 0; } -#ifdef CONFIG_SMP -extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif - -long do_syscall_trace_enter(struct pt_regs *regs); -void do_syscall_trace_leave(struct pt_regs *regs); - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7c81d3e563b2..be85cf156a1f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -393,6 +393,7 @@ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ #define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */ +#define SPRN_TRIG2 0x372 #define SPRN_PMCR 0x374 /* Power Management Control Register */ #define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */ @@ -1435,8 +1436,6 @@ static inline void mtsr(u32 val, u32 idx) } #endif -#define proc_trap() asm volatile("trap") - extern unsigned long current_stack_frame(void); register unsigned long current_stack_pointer asm("r1"); @@ -1447,16 +1446,6 @@ extern void scom970_write(unsigned int address, unsigned long value); struct pt_regs; extern void ppc_save_regs(struct pt_regs *regs); - -static inline void update_power8_hid0(unsigned long hid0) -{ - /* - * The HID0 update on Power8 should at the very least be - * preceded by a SYNC instruction followed by an ISYNC - * instruction - */ - asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); -} #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_REG_H */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index b774a4477d5f..792eefaf230b 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -92,6 +92,9 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed after user accesses from the kernel #define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull +// The STF flush should be executed on privilege state switch +#define SEC_FTR_STF_BARRIER 0x0000000000010000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ @@ -99,6 +102,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_L1D_FLUSH_UACCESS | \ + SEC_FTR_STF_BARRIER | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h new file mode 100644 index 000000000000..b040094f7920 --- /dev/null +++ b/arch/powerpc/include/asm/set_memory.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SET_MEMORY_H +#define _ASM_POWERPC_SET_MEMORY_H + +#define SET_MEMORY_RO 0 +#define SET_MEMORY_RW 1 +#define SET_MEMORY_NX 2 +#define SET_MEMORY_X 3 + +int change_memory_attr(unsigned long addr, int numpages, long action); + +static inline int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RO); +} + +static inline int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RW); +} + +static inline int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NX); +} + +static inline int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_X); +} + +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot); + +#endif diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e89bfebd4e00..6c1a7d217d1a 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -10,7 +10,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; extern bool init_mem_is_free; -extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 972ed0df154d..1df867c2e054 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -13,12 +13,11 @@ struct pt_regs; * we don't allow putting a breakpoint on an mtmsrd instruction. * Similarly we don't allow breakpoints on rfid instructions. * These macros tell us if an instruction is a mtmsrd or rfid. - * Note that IS_MTMSRD returns true for both an mtmsr (32-bit) - * and an mtmsrd (64-bit). + * Note that these return true for both mtmsr/rfi (32-bit) + * and mtmsrd/rfid (64-bit). */ #define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124) -#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000024) -#define IS_RFI(instr) ((ppc_inst_val(instr) & 0xfc0007fe) == 0x4c000064) +#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024) enum instruction_type { COMPUTE, /* arith/logical/CR op, etc. */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 5bf65f5d44a9..fe683371336f 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -24,8 +24,8 @@ typedef ppc_opcode_t uprobe_opcode_t; struct arch_uprobe { union { - struct ppc_inst insn; - struct ppc_inst ixol; + u32 insn[2]; + u32 ixol[2]; }; }; diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index e33f80b0ea81..57573d9c1e09 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -5,8 +5,10 @@ #ifndef _ASM_POWERPC_VAS_H #define _ASM_POWERPC_VAS_H - -struct vas_window; +#include <linux/sched/mm.h> +#include <linux/mmu_context.h> +#include <asm/icswx.h> +#include <uapi/asm/vas-api.h> /* * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25 @@ -49,6 +51,64 @@ enum vas_cop_type { }; /* + * User space VAS windows are opened by tasks and take references + * to pid and mm until windows are closed. + * Stores pid, mm, and tgid for each window. + */ +struct vas_user_win_ref { + struct pid *pid; /* PID of owner */ + struct pid *tgid; /* Thread group ID of owner */ + struct mm_struct *mm; /* Linux process mm_struct */ +}; + +/* + * Common VAS window struct on PowerNV and PowerVM + */ +struct vas_window { + u32 winid; + u32 wcreds_max; /* Window credits */ + enum vas_cop_type cop; + struct vas_user_win_ref task_ref; + char *dbgname; + struct dentry *dbgdir; +}; + +/* + * User space window operations used for powernv and powerVM + */ +struct vas_user_win_ops { + struct vas_window * (*open_win)(int vas_id, u64 flags, + enum vas_cop_type); + u64 (*paste_addr)(struct vas_window *); + int (*close_win)(struct vas_window *); +}; + +static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref) +{ + /* Drop references to pid, tgid, and mm */ + put_pid(ref->pid); + put_pid(ref->tgid); + if (ref->mm) + mmdrop(ref->mm); +} + +static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref) +{ + mm_context_add_vas_window(ref->mm); + /* + * Even a process that has no foreign real address mapping can + * use an unpaired COPY instruction (to no real effect). Issue + * CP_ABORT to clear any pending COPY and prevent a covert + * channel. + * + * __switch_to() will issue CP_ABORT on future context switches + * if process / thread has any open VAS window (Use + * current->mm->context.vas_windows). + */ + asm volatile(PPC_CP_ABORT); +} + +/* * Receive window attributes specified by the (in-kernel) owner of window. */ struct vas_rx_win_attr { @@ -100,6 +160,7 @@ struct vas_tx_win_attr { bool rx_win_ord_mode; }; +#ifdef CONFIG_PPC_POWERNV /* * Helper to map a chip id to VAS id. * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N @@ -162,6 +223,43 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name); +void vas_unregister_api_powernv(void); +#endif + +#ifdef CONFIG_PPC_PSERIES + +/* VAS Capabilities */ +#define VAS_GZIP_QOS_FEAT 0x1 +#define VAS_GZIP_DEF_FEAT 0x2 +#define VAS_GZIP_QOS_FEAT_BIT PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */ +#define VAS_GZIP_DEF_FEAT_BIT PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */ + +/* NX Capabilities */ +#define VAS_NX_GZIP_FEAT 0x1 +#define VAS_NX_GZIP_FEAT_BIT PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */ + +/* + * These structs are used to retrieve overall VAS capabilities that + * the hypervisor provides. + */ +struct hv_vas_all_caps { + __be64 descriptor; + __be64 feat_type; +} __packed __aligned(0x1000); + +struct vas_all_caps { + u64 descriptor; + u64 feat_type; +}; + +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result); +int vas_register_api_pseries(struct module *mod, + enum vas_cop_type cop_type, const char *name); +void vas_unregister_api_pseries(void); +#endif + /* * Register / unregister coprocessor type to VAS API which will be exported * to user space. Applications can use this API to open / close window @@ -171,7 +269,12 @@ int vas_paste_crb(struct vas_window *win, int offset, bool re); * used for others in future. */ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name); + const char *name, + const struct vas_user_win_ops *vops); void vas_unregister_coproc_api(void); +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref); +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref); +void vas_dump_crb(struct coprocessor_request_block *crb); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 8e903b3f9c24..d9cf192368ad 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -65,8 +65,12 @@ struct icp_ops { extern const struct icp_ops *icp_ops; +#ifdef CONFIG_PPC_ICS_NATIVE /* Native ICS */ extern int ics_native_init(void); +#else +static inline int ics_native_init(void) { return -ENODEV; } +#endif /* RTAS ICS */ #ifdef CONFIG_PPC_ICS_RTAS diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 50ef95e2f5b1..82488b1e7276 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -77,6 +77,9 @@ /* Indicate that the 'dimm_fuel_gauge' field is valid */ #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1 +/* Indicate that the 'dimm_dsc' field is valid */ +#define PDSM_DIMM_DSC_VALID 2 + /* * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH * Various flags indicate the health status of the dimm. @@ -105,6 +108,9 @@ struct nd_papr_pdsm_health { /* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */ __u16 dimm_fuel_gauge; + + /* Extension flag PDSM_DIMM_DSC_VALID */ + __u64 dimm_dsc; }; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; }; diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h index ebd4b2424785..7c81301ecdba 100644 --- a/arch/powerpc/include/uapi/asm/vas-api.h +++ b/arch/powerpc/include/uapi/asm/vas-api.h @@ -13,11 +13,15 @@ #define VAS_MAGIC 'v' #define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr) +/* Flags to VAS TX open window ioctl */ +/* To allocate a window with QoS credit, otherwise use default credit */ +#define VAS_TX_WIN_FLAG_QOS_CREDIT 0x0000000000000001 + struct vas_tx_win_open_attr { __u32 version; __s16 vas_id; /* specific instance of vas or -1 for default */ __u16 reserved1; - __u64 flags; /* Future use */ + __u64 flags; __u64 reserved2[6]; }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index aa267d173ded..a47eefa09bcb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -86,11 +86,7 @@ int main(void) OFFSET(PACA_CANARY, paca_struct, canary); #endif #endif - OFFSET(MMCONTEXTID, mm_struct, context.id); -#ifdef CONFIG_PPC64 - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(NMI_MASK, NMI_MASK); -#else +#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC_RTAS OFFSET(RTAS_SP, thread_struct, rtas_sp); #endif @@ -119,7 +115,6 @@ int main(void) #ifdef CONFIG_ALTIVEC OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr); OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area); - OFFSET(THREAD_VRSAVE, thread_struct, vrsave); OFFSET(THREAD_USED_VR, thread_struct, used_vr); OFFSET(VRSTATE_VSCR, thread_vr_state, vscr); OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec); @@ -150,22 +145,15 @@ int main(void) #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); OFFSET(THREAD_ACC, thread_struct, acc); - OFFSET(THREAD_SPEFSCR, thread_struct, spefscr); OFFSET(THREAD_USED_SPE, thread_struct, used_spe); #endif /* CONFIG_SPE */ #endif /* CONFIG_PPC64 */ -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - OFFSET(THREAD_DBCR0, thread_struct, debug.dbcr0); -#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu); #endif #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif -#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) - OFFSET(KUAP, thread_struct, kuap); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -185,19 +173,12 @@ int main(void) sizeof(struct pt_regs) + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); - OFFSET(TI_PREEMPT, thread_info, preempt_count); #ifdef CONFIG_PPC64 OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size); - OFFSET(DCACHEL1BLOCKSPERPAGE, ppc64_caches, l1d.blocks_per_page); - OFFSET(ICACHEL1BLOCKSIZE, ppc64_caches, l1i.block_size); - OFFSET(ICACHEL1LOGBLOCKSIZE, ppc64_caches, l1i.log_block_size); - OFFSET(ICACHEL1BLOCKSPERPAGE, ppc64_caches, l1i.blocks_per_page); /* paca */ - DEFINE(PACA_SIZE, sizeof(struct paca_struct)); OFFSET(PACAPACAINDEX, paca_struct, paca_index); OFFSET(PACAPROCSTART, paca_struct, cpu_start); OFFSET(PACAKSAVE, paca_struct, kstack); @@ -209,18 +190,13 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); +#ifdef CONFIG_PPC_BOOK3S_64 + OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid); + OFFSET(PACASRR_VALID, paca_struct, srr_valid); +#endif OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3S - OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); - OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); - OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit); - DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); -#endif /* CONFIG_PPC_MM_SLICES */ -#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACAPGD, paca_struct, pgd); @@ -241,21 +217,9 @@ int main(void) #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S_64 - OFFSET(PACASLBCACHE, paca_struct, slb_cache); - OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); - OFFSET(PACASTABRR, paca_struct, stab_rr); - OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); -#ifdef CONFIG_PPC_MM_SLICES - OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); -#else - OFFSET(PACACONTEXTSLLP, paca_struct, mm_ctx_sllp); -#endif /* CONFIG_PPC_MM_SLICES */ OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); -#ifdef CONFIG_PPC_PSERIES - OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr); -#endif OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); @@ -264,9 +228,7 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); #endif - OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); - OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); #endif /* CONFIG_PPC_BOOK3S_64 */ OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 @@ -282,6 +244,9 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); +#ifdef CONFIG_PPC64 + OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); +#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif @@ -343,10 +308,6 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr); STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#ifdef CONFIG_PPC_KUAP - STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); -#endif - #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) @@ -368,10 +329,6 @@ int main(void) #endif #endif -#ifndef CONFIG_PPC64 - OFFSET(MM_PGD, mm_struct, pgd); -#endif /* ! CONFIG_PPC64 */ - /* About the CPU features table */ OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features); OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup); @@ -404,13 +361,6 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(PGD_TABLE_SIZE, (sizeof(pgd_t) << max(RADIX_PGD_INDEX_SIZE, H_PGD_INDEX_SIZE))); -#else - DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); -#endif - DEFINE(PTE_SIZE, sizeof(pte_t)); - #ifdef CONFIG_KVM OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack); OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid); @@ -482,11 +432,9 @@ int main(void) OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); - OFFSET(KVM_NEED_FLUSH, kvm, arch.need_tlb_flush.bits); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); - OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); @@ -514,7 +462,6 @@ int main(void) OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); - OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); @@ -525,7 +472,6 @@ int main(void) OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc); - OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc); OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar); OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar); OFFSET(VCPU_SIER, kvm_vcpu, arch.sier); @@ -645,10 +591,8 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); - HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -756,7 +700,6 @@ int main(void) #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); - DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); #ifdef CONFIG_PPC_8xx DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 735e89337398..5693e1c67c2b 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -35,7 +35,7 @@ void __init reserve_kdump_trampoline(void) static void __init create_trampoline(unsigned long addr) { - struct ppc_inst *p = (struct ppc_inst *)addr; + u32 *p = (u32 *)addr; /* The maximum range of a single instruction branch, is the current * instruction's address + (32 MB - 4) bytes. For the trampoline we @@ -45,8 +45,8 @@ static void __init create_trampoline(unsigned long addr) * branch to "addr" we jump to ("addr" + 32 MB). Although it requires * two instructions it doesn't require any registers. */ - patch_instruction(p, ppc_inst(PPC_INST_NOP)); - patch_branch((void *)p + 4, addr + PHYSICAL_START, 0); + patch_instruction(p, ppc_inst(PPC_RAW_NOP())); + patch_branch(p + 1, addr + PHYSICAL_START, 0); } void __init setup_kdump_trampoline(void) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9160285cb2f4..0273a1349006 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -32,6 +32,7 @@ #include <asm/barrier.h> #include <asm/kup.h> #include <asm/bug.h> +#include <asm/interrupt.h> #include "head_32.h" @@ -74,6 +75,24 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) .globl transfer_to_syscall transfer_to_syscall: + stw r11, GPR1(r1) + stw r11, 0(r1) + mflr r12 + stw r12, _LINK(r1) +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#endif + lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + SAVE_GPR(2, r1) + addi r12,r12,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r1) + li r2, INTERRUPT_SYSCALL + stw r12,8(r1) + stw r2,_TRAP(r1) + SAVE_GPR(0, r1) + SAVE_4GPRS(3, r1) + SAVE_2GPRS(7, r1) + addi r2,r10,-THREAD SAVE_NVGPRS(r1) /* Calling convention has r9 = orig r0, r10 = regs */ @@ -176,28 +195,6 @@ _GLOBAL(_switch) /* r3-r12 are caller saved -- Cort */ SAVE_NVGPRS(r1) stw r0,_NIP(r1) /* Return to switch caller */ - mfmsr r11 - li r0,MSR_FP /* Disable floating-point */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - oris r0,r0,MSR_VEC@h /* Disable altivec */ - mfspr r12,SPRN_VRSAVE /* save vrsave register value */ - stw r12,THREAD+THREAD_VRSAVE(r2) -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - oris r0,r0,MSR_SPE@h /* Disable SPE */ - mfspr r12,SPRN_SPEFSCR /* save spefscr register value */ - stw r12,THREAD+THREAD_SPEFSCR(r2) -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - and. r0,r0,r11 /* FP or altivec or SPE enabled? */ - beq+ 1f - andc r11,r11,r0 - mtmsr r11 - isync -1: stw r11,_MSR(r1) mfcr r10 stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ @@ -218,19 +215,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) mr r3,r2 addi r2,r4,-THREAD /* Update current */ -#ifdef CONFIG_ALTIVEC -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_VRSAVE(r2) - mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) -#endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_SPE -BEGIN_FTR_SECTION - lwz r0,THREAD+THREAD_SPEFSCR(r2) - mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ -END_FTR_SECTION_IFSET(CPU_FTR_SPE) -#endif /* CONFIG_SPE */ - lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 03727308d8cc..15720f8661a1 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -32,7 +32,6 @@ #include <asm/irqflags.h> #include <asm/hw_irq.h> #include <asm/context_tracking.h> -#include <asm/tm.h> #include <asm/ppc-opcode.h> #include <asm/barrier.h> #include <asm/export.h> @@ -48,372 +47,7 @@ /* * System calls. */ - .section ".toc","aw" -SYS_CALL_TABLE: - .tc sys_call_table[TC],sys_call_table - -#ifdef CONFIG_COMPAT -COMPAT_SYS_CALL_TABLE: - .tc compat_sys_call_table[TC],compat_sys_call_table -#endif - -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - .section ".text" - .align 7 - -#ifdef CONFIG_PPC_BOOK3S -.macro system_call_vectored name trapnr - .globl system_call_vectored_\name -system_call_vectored_\name: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - SCV_INTERRUPT_TO_KERNEL - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_LINK(r1) - std r11,_CTR(r1) - - li r11,\trapnr - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - -BEGIN_FTR_SECTION - HMT_MEDIUM -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * scv enters with MSR[EE]=1 and is immediately considered soft-masked. - * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, - * and interrupts may be masked and pending already. - * system_call_exception() will call trace_hardirqs_off() which means - * interrupts could already have been blocked before trace_hardirqs_off, - * but this is the best we can do. - */ - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,1 /* scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r4,_NIP(r1) - ld r5,_MSR(r1) - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - cmpdi r3,0 - bne .Lsyscall_vectored_\name\()_restore_regs - - /* rfscv returns with LR->NIA and CTR->MSR */ - mtlr r4 - mtctr r5 - - /* Could zero these as per ABI, but we may consider a stricter ABI - * which preserves these if libc implementations can benefit, so - * restore them for now until further measurement is done. */ - ld r0,GPR0(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) - /* Zero volatile regs that may contain sensitive kernel data */ - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtspr SPRN_XER,r0 - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFSCV_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_vectored_\name\()_restore_regs: - li r3,0 - mtmsrd r3,1 - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - - ld r3,_CTR(r1) - ld r4,_LINK(r1) - ld r5,_XER(r1) - - REST_NVGPRS(r1) - ld r0,GPR0(r1) - mtcr r2 - mtctr r3 - mtlr r4 - mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) - RFI_TO_USER -.endm - -system_call_vectored common 0x3000 -/* - * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 - * which is tested by system_call_exception when r0 is -1 (as set by vector - * entry code). - */ -system_call_vectored sigill 0x7ff0 - - -/* - * Entered via kernel return set up by kernel/sstep.c, must match entry regs - */ - .globl system_call_vectored_emulate -system_call_vectored_emulate: -_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) - li r10,IRQS_ALL_DISABLED - stb r10,PACAIRQSOFTMASK(r13) - b system_call_vectored_common -#endif - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common_real -system_call_common_real: - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - mtmsrd r10 - - .balign IFETCH_ALIGN_BYTES - .globl system_call_common -system_call_common: -_ASM_NOKPROBE_SYMBOL(system_call_common) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -BEGIN_FTR_SECTION - extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ - bne .Ltabort_syscall -END_FTR_SECTION_IFSET(CPU_FTR_TM) -#endif - mr r10,r1 - ld r1,PACAKSAVE(r13) - std r10,0(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - std r0,GPR0(r1) - std r10,GPR1(r1) - std r2,GPR2(r1) -#ifdef CONFIG_PPC_FSL_BOOK3E -START_BTB_FLUSH_SECTION - BTB_FLUSH(r10) -END_BTB_FLUSH_SECTION -#endif - ld r2,PACATOC(r13) - mfcr r12 - li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) - /* Zero r9-r12, this should only be required when restoring all GPRs */ - std r11,GPR9(r1) - std r11,GPR10(r1) - std r11,GPR11(r1) - std r11,GPR12(r1) - std r9,GPR13(r1) - SAVE_NVGPRS(r1) - std r11,_XER(r1) - std r11,_CTR(r1) - mflr r10 - - /* - * This clears CR0.SO (bit 28), which is the error indication on - * return from this system call. - */ - rldimi r12,r11,28,(63-28) - li r11,0xc00 - std r10,_LINK(r1) - std r11,_TRAP(r1) - std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ - - /* - * We always enter kernel from userspace with irq soft-mask enabled and - * nothing pending. system_call_exception() will call - * trace_hardirqs_off(). - */ - li r11,IRQS_ALL_DISABLED - li r12,PACA_IRQ_HARD_DIS - stb r11,PACAIRQSOFTMASK(r13) - stb r12,PACAIRQHAPPENED(r13) - - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception - -.Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD - li r5,0 /* !scv */ - bl syscall_exit_prepare - - ld r2,_CCR(r1) - ld r4,_NIP(r1) - ld r5,_MSR(r1) - ld r6,_LINK(r1) - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r5 - mtlr r6 - - cmpdi r3,0 - bne .Lsyscall_restore_regs - /* Zero volatile regs that may contain sensitive kernel data */ - li r0,0 - li r4,0 - li r5,0 - li r6,0 - li r7,0 - li r8,0 - li r9,0 - li r10,0 - li r11,0 - li r12,0 - mtctr r0 - mtspr SPRN_XER,r0 -.Lsyscall_restore_regs_cont: - -BEGIN_FTR_SECTION - HMT_MEDIUM_LOW -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * The value of AMR only matters while we're in the kernel. - */ - mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) - RFI_TO_USER - b . /* prevent speculative execution */ - -.Lsyscall_restore_regs: - ld r3,_CTR(r1) - ld r4,_XER(r1) - REST_NVGPRS(r1) - mtctr r3 - mtspr SPRN_XER,r4 - ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) - b .Lsyscall_restore_regs_cont - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -.Ltabort_syscall: - /* Firstly we need to enable TM in the kernel */ - mfmsr r10 - li r9, 1 - rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r10, 0 - - /* tabort, this dooms the transaction, nothing else */ - li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R9) - - /* - * Return directly to userspace. We have corrupted user register state, - * but userspace will never see that register state. Execution will - * resume after the tbegin of the aborted transaction with the - * checkpointed register state. - */ - li r9, MSR_RI - andc r10, r10, r9 - mtmsrd r10, 1 - mtspr SPRN_SRR0, r11 - mtspr SPRN_SRR1, r12 - RFI_TO_USER - b . /* prevent speculative execution */ -#endif - -#ifdef CONFIG_PPC_BOOK3S -_GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_vectored_common_exit -#endif - -_GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) - li r3,0 /* fork() return value */ - b .Lsyscall_exit - -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) - mtctr r14 - mr r3,r15 -#ifdef PPC64_ELF_ABI_v2 - mr r12,r14 -#endif - bctrl - li r3,0 - b .Lsyscall_exit #ifdef CONFIG_PPC_BOOK3S_64 @@ -630,156 +264,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) addi r1,r1,SWITCH_FRAME_SIZE blr - /* - * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not - * touched, no exit work created, then this can be used. - */ - .balign IFETCH_ALIGN_BYTES - .globl fast_interrupt_return -fast_interrupt_return: -_ASM_NOKPROBE_SYMBOL(fast_interrupt_return) - kuap_check_amr r3, r4 - ld r5,_MSR(r1) - andi. r0,r5,MSR_PR -#ifdef CONFIG_PPC_BOOK3S - bne .Lfast_user_interrupt_return_amr - kuap_kernel_restore r3, r4 - andi. r0,r5,MSR_RI - li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ - bne+ .Lfast_kernel_interrupt_return - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b . /* should not get here */ -#else - bne .Lfast_user_interrupt_return - b .Lfast_kernel_interrupt_return -#endif - - .balign IFETCH_ALIGN_BYTES - .globl interrupt_return -interrupt_return: -_ASM_NOKPROBE_SYMBOL(interrupt_return) - ld r4,_MSR(r1) - andi. r0,r4,MSR_PR - beq .Lkernel_interrupt_return - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_user_prepare - cmpdi r3,0 - bne- .Lrestore_nvgprs - -#ifdef CONFIG_PPC_BOOK3S -.Lfast_user_interrupt_return_amr: - kuap_user_restore r3, r4 -#endif -.Lfast_user_interrupt_return: - ld r11,_NIP(r1) - ld r12,_MSR(r1) -BEGIN_FTR_SECTION - ld r10,_PPR(r1) - mtspr SPRN_PPR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_CCR(r1) - ld r4,_LINK(r1) - ld r5,_CTR(r1) - ld r6,_XER(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) - - mtcr r3 - mtlr r4 - mtctr r5 - mtspr SPRN_XER,r6 - - REST_4GPRS(2, r1) - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - RFI_TO_USER - b . /* prevent speculative execution */ - -.Lrestore_nvgprs: - REST_NVGPRS(r1) - b .Lfast_user_interrupt_return - - .balign IFETCH_ALIGN_BYTES -.Lkernel_interrupt_return: - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_kernel_prepare - -.Lfast_kernel_interrupt_return: - cmpdi cr1,r3,0 - ld r11,_NIP(r1) - ld r12,_MSR(r1) - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 - -BEGIN_FTR_SECTION - stdcx. r0,0,r1 /* to clear the reservation */ -FTR_SECTION_ELSE - ldarx r0,0,r1 -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) - - ld r3,_LINK(r1) - ld r4,_CTR(r1) - ld r5,_XER(r1) - ld r6,_CCR(r1) - li r0,0 - - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - - mtlr r3 - mtctr r4 - mtspr SPRN_XER,r5 - - /* - * Leaving a stale exception_marker on the stack can confuse - * the reliable stack unwinder later on. Clear it. - */ - std r0,STACK_FRAME_OVERHEAD-16(r1) - - REST_4GPRS(2, r1) - - bne- cr1,1f /* emulate stack store */ - mtcr r6 - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - RFI_TO_KERNEL - b . /* prevent speculative execution */ - -1: /* - * Emulate stack store with update. New r1 value was already calculated - * and updated in our interrupt regs by emulate_loadstore, but we can't - * store the previous value of r1 to the stack before re-loading our - * registers from it, otherwise they could be clobbered. Use - * PACA_EXGEN as temporary storage to hold the store data, as - * interrupts are disabled here so it won't be clobbered. - */ - mtcr r6 - std r9,PACA_EXGEN+0(r13) - addi r9,r1,INT_FRAME_SIZE /* get original r1 */ - REST_GPR(6, r1) - REST_GPR(0, r1) - REST_GPR(1, r1) - std r9,0(r1) /* perform store component of stdu */ - ld r9,PACA_EXGEN+0(r13) - - RFI_TO_KERNEL - b . /* prevent speculative execution */ - #ifdef CONFIG_PPC_RTAS /* * On CHRP, the Run-Time Abstraction Services (RTAS) have to be diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 2ed14d4a47f5..93b0f3ec8fb0 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -38,9 +38,9 @@ static int __init early_init_dt_scan_epapr(unsigned long node, for (i = 0; i < (len / 4); i++) { struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); - patch_instruction((struct ppc_inst *)(epapr_hypercall_start + i), inst); + patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) - patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), inst); + patch_instruction(epapr_ev_idle_start + i, inst); #endif } diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index f1ae710274bc..1401787b0b93 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -26,6 +26,10 @@ #include <asm/feature-fixups.h> #include <asm/context_tracking.h> +/* 64e interrupt returns always use SRR registers */ +#define fast_interrupt_return fast_interrupt_return_srr +#define interrupt_return interrupt_return_srr + /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... * when taking special interrupts. For now we don't support that, @@ -897,6 +901,34 @@ kernel_dbg_exc: bl unknown_exception b interrupt_return +.macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + ld r11,PACATOC(r13) + ld r14,__start___restart_table@got(r11) + ld r15,__stop___restart_table@got(r11) +#else + LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) +#endif +300: + cmpd r14,r15 + beq 302f + ld r11,0(r14) + cmpld r10,r11 + blt 301f + ld r11,8(r14) + cmpld r10,r11 + bge 301f + ld r11,16(r14) + b 303f +301: + addi r14,r14,24 + b 300b +302: + li r11,0 +303: +.endm + /* * An interrupt came in while soft-disabled; We mark paca->irq_happened * accordingly and if the interrupt is level sensitive, we hard disable @@ -905,6 +937,9 @@ kernel_dbg_exc: */ .macro masked_interrupt_book3e paca_irq full_mask + std r14,PACA_EXGEN+EX_R14(r13) + std r15,PACA_EXGEN+EX_R15(r13) + lbz r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS @@ -914,15 +949,23 @@ kernel_dbg_exc: stb r10,PACAIRQHAPPENED(r13) .if \full_mask == 1 - rldicl r10,r11,48,1 /* clear MSR_EE */ - rotldi r11,r10,16 + xori r11,r11,MSR_EE /* clear MSR_EE */ mtspr SPRN_SRR1,r11 .endif + mfspr r10,SPRN_SRR0 + SEARCH_RESTART_TABLE + cmpdi r11,0 + beq 1f + mtspr SPRN_SRR0,r11 /* return to restart address */ +1: + lwz r11,PACA_EXGEN+EX_CR(r13) mtcr r11 ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) + ld r14,PACA_EXGEN+EX_R14(r13) + ld r15,PACA_EXGEN+EX_R15(r13) mfspr r13,SPRN_SPRG_GEN_SCRATCH rfi b . @@ -1282,7 +1325,12 @@ a2_tlbinit_code_start: a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ +#ifdef CONFIG_RELOCATABLE + ld r5,PACATOC(r13) + ld r3,1f@got(r5) +#else LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f) +#endif mtctr r3 bctr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7fc6e078d4e..4aec59a77d4c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -428,18 +428,31 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) /* If coming from user, skip soft-mask tests. */ andi. r10,r12,MSR_PR - bne 2f - - /* Kernel code running below __end_interrupts is implicitly - * soft-masked */ - LOAD_HANDLER(r10, __end_interrupts) + bne 3f + + /* + * Kernel code running below __end_soft_masked may be + * implicitly soft-masked if it is within the regions + * in the soft mask table. + */ + LOAD_HANDLER(r10, __end_soft_masked) cmpld r11,r10 + bge+ 1f + + /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */ + mtctr r12 + stw r9,PACA_EXGEN+EX_CCR(r13) + SEARCH_SOFT_MASK_TABLE + cmpdi r12,0 + mfctr r12 /* Restore r12 to SRR1 */ + lwz r9,PACA_EXGEN+EX_CCR(r13) + beq 1f /* Not in soft-mask table */ li r10,IMASK - blt- 1f + b 2f /* In soft-mask table, always mask */ /* Test the soft mask state against our interrupt's bit */ - lbz r10,PACAIRQSOFTMASK(r13) -1: andi. r10,r10,IMASK +1: lbz r10,PACAIRQSOFTMASK(r13) +2: andi. r10,r10,IMASK /* Associate vector numbers with bits in paca->irq_happened */ .if IVEC == 0x500 || IVEC == 0xea0 li r10,PACA_IRQ_EE @@ -470,7 +483,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .if ISTACK andi. r10,r12,MSR_PR /* See if coming from user */ -2: mr r10,r1 /* Save r1 */ +3: mr r10,r1 /* Save r1 */ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */ beq- 100f ld r1,PACAKSAVE(r13) /* kernel stack to use */ @@ -485,6 +498,20 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + /* Mark our [H]SRRs valid for return */ + li r10,1 + .if IHSRR_IF_HVMODE + BEGIN_FTR_SECTION + stb r10,PACAHSRR_VALID(r13) + FTR_SECTION_ELSE + stb r10,PACASRR_VALID(r13) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .elseif IHSRR + stb r10,PACAHSRR_VALID(r13) + .else + stb r10,PACASRR_VALID(r13) + .endif + .if ISET_RI li r10,MSR_RI mtmsrd r10,1 /* Set MSR_RI */ @@ -577,6 +604,66 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) __GEN_COMMON_BODY \name .endm +.macro SEARCH_RESTART_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___restart_table) + LOAD_REG_ADDR(r10, __stop___restart_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + ld r12,16(r9) + b 303f +301: + addi r9,r9,24 + b 300b +302: + li r12,0 +303: +.endm + +.macro SEARCH_SOFT_MASK_TABLE +#ifdef CONFIG_RELOCATABLE + mr r12,r2 + ld r2,PACATOC(r13) + LOAD_REG_ADDR(r9, __start___soft_mask_table) + LOAD_REG_ADDR(r10, __stop___soft_mask_table) + mr r2,r12 +#else + LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table) + LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table) +#endif +300: + cmpd r9,r10 + beq 302f + ld r12,0(r9) + cmpld r11,r12 + blt 301f + ld r12,8(r9) + cmpld r11,r12 + bge 301f + li r12,1 + b 303f +301: + addi r9,r9,16 + b 300b +302: + li r12,0 +303: +.endm + /* * Restore all registers including H/SRR0/1 saved in a stack frame of a * standard exception. @@ -584,10 +671,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro EXCEPTION_RESTORE_REGS hsrr=0 /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) + li r10,0 .if \hsrr mtspr SPRN_HSRR1,r9 + stb r10,PACAHSRR_VALID(r13) .else mtspr SPRN_SRR1,r9 + stb r10,PACASRR_VALID(r13) .endif ld r9,_NIP(r1) .if \hsrr @@ -704,17 +794,17 @@ __start_interrupts: * scv instructions enter the kernel without changing EE, RI, ME, or HV. * In particular, this means we can take a maskable interrupt at any point * in the scv handler, which is unlike any other interrupt. This is solved - * by treating the instruction addresses below __end_interrupts as being - * soft-masked. + * by treating the instruction addresses in the handler as being soft-masked, + * by adding a SOFT_MASK_TABLE entry for them. * * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and * ensure scv is never executed with relocation off, which means AIL-0 * should never happen. * - * Before leaving the below __end_interrupts text, at least of the following - * must be true: + * Before leaving the following inside-__end_soft_masked text, at least of the + * following must be true: * - MSR[PR]=1 (i.e., return to userspace) - * - MSR_EE|MSR_RI is set (no reentrant exceptions) + * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * * Call convention: @@ -722,6 +812,7 @@ __start_interrupts: * syscall register convention is in Documentation/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) +1: /* SCV 0 */ mr r9,r13 GET_PACA(r13) @@ -751,8 +842,11 @@ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) b system_call_vectored_sigill #endif .endr +2: EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000) +SOFT_MASK_TABLE(1b, 2b) // Treat scv vectors as soft-masked, see comment above. + #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) __LOAD_HANDLER(r10, system_call_vectored_common) @@ -1149,7 +1243,7 @@ EXC_COMMON_BEGIN(machine_check_common) mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_PPC_P7_NAP @@ -1275,7 +1369,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr 1: bl do_break /* @@ -1283,7 +1377,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * If so, we need to restore them with their updated values. */ REST_NVGPRS(r1) - b interrupt_return + b interrupt_return_srr /** @@ -1323,7 +1417,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1332,7 +1426,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1368,7 +1462,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_srr /** @@ -1403,7 +1497,7 @@ BEGIN_MMU_FTR_SECTION bl do_slb_fault cmpdi r3,0 bne- 1f - b fast_interrupt_return + b fast_interrupt_return_srr 1: /* Error case */ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ @@ -1412,7 +1506,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault - b interrupt_return + b interrupt_return_srr /** @@ -1456,7 +1550,11 @@ EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + BEGIN_FTR_SECTION + b interrupt_return_hsrr + FTR_SECTION_ELSE + b interrupt_return_srr + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) /** @@ -1483,7 +1581,7 @@ EXC_COMMON_BEGIN(alignment_common) addi r3,r1,STACK_FRAME_OVERHEAD bl alignment_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -1590,7 +1688,7 @@ EXC_COMMON_BEGIN(program_check_common) addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /* @@ -1633,12 +1731,12 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif bl load_up_fpu - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl fp_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif @@ -1677,7 +1775,7 @@ EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer addi r3,r1,STACK_FRAME_OVERHEAD bl timer_interrupt - b interrupt_return + b interrupt_return_srr /** @@ -1714,6 +1812,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * * Be careful to avoid touching the kernel stack. */ + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_CTR(r13) mtctr r10 mtcrf 0x80,r9 @@ -1761,7 +1861,7 @@ EXC_COMMON_BEGIN(doorbell_super_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0xb00, 0x100) @@ -1838,8 +1938,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mtctr r10 bctr .else - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ #ifdef CONFIG_RELOCATABLE __LOAD_HANDLER(r10, system_call_common) mtctr r10 @@ -1925,7 +2023,7 @@ EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step addi r3,r1,STACK_FRAME_OVERHEAD bl single_step_exception - b interrupt_return + b interrupt_return_srr /** @@ -1963,7 +2061,7 @@ BEGIN_MMU_FTR_SECTION MMU_FTR_SECTION_ELSE bl unknown_exception ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) - b interrupt_return + b interrupt_return_hsrr /** @@ -1988,7 +2086,7 @@ EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2012,7 +2110,7 @@ EXC_COMMON_BEGIN(emulation_assist_common) addi r3,r1,STACK_FRAME_OVERHEAD bl emulation_assist_interrupt REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_hsrr /** @@ -2089,7 +2187,7 @@ EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_FRAME_OVERHEAD bl handle_hmi_exception - b interrupt_return + b interrupt_return_hsrr /** @@ -2119,7 +2217,7 @@ EXC_COMMON_BEGIN(h_doorbell_common) #else bl unknown_async_exception #endif - b interrupt_return + b interrupt_return_hsrr /** @@ -2145,7 +2243,7 @@ EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq addi r3,r1,STACK_FRAME_OVERHEAD bl do_IRQ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xec0, 0x20) @@ -2188,7 +2286,7 @@ EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor addi r3,r1,STACK_FRAME_OVERHEAD bl performance_monitor_exception - b interrupt_return + b interrupt_return_srr /** @@ -2225,19 +2323,19 @@ BEGIN_FTR_SECTION END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif bl load_up_altivec - b fast_interrupt_return + b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl altivec_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2278,14 +2376,14 @@ BEGIN_FTR_SECTION 2: /* User process was in a transaction */ addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_tm - b interrupt_return + b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif addi r3,r1,STACK_FRAME_OVERHEAD bl vsx_unavailable_exception - b interrupt_return + b interrupt_return_srr /** @@ -2313,7 +2411,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ - b interrupt_return + b interrupt_return_srr /** @@ -2341,7 +2439,7 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common) addi r3,r1,STACK_FRAME_OVERHEAD bl facility_unavailable_exception REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ - b interrupt_return + b interrupt_return_hsrr EXC_REAL_NONE(0xfa0, 0x20) @@ -2370,7 +2468,7 @@ EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_system_error_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) @@ -2401,7 +2499,7 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint addi r3,r1,STACK_FRAME_OVERHEAD bl instruction_breakpoint_exception - b interrupt_return + b interrupt_return_srr EXC_REAL_NONE(0x1400, 0x100) @@ -2509,6 +2607,8 @@ BEGIN_FTR_SECTION ld r10,PACA_EXGEN+EX_CFAR(r13) mtspr SPRN_CFAR,r10 END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + li r10,0 + stb r10,PACAHSRR_VALID(r13) ld r10,PACA_EXGEN+EX_R10(r13) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) @@ -2521,7 +2621,7 @@ EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception addi r3,r1,STACK_FRAME_OVERHEAD bl unknown_exception - b interrupt_return + b interrupt_return_hsrr #ifdef CONFIG_CBE_RAS @@ -2538,7 +2638,7 @@ EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_maintenance_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) @@ -2568,7 +2668,7 @@ EXC_COMMON_BEGIN(altivec_assist_common) #else bl unknown_exception #endif - b interrupt_return + b interrupt_return_srr #ifdef CONFIG_CBE_RAS @@ -2585,7 +2685,7 @@ EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal addi r3,r1,STACK_FRAME_OVERHEAD bl cbe_thermal_exception - b interrupt_return + b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) @@ -2610,7 +2710,6 @@ INT_DEFINE_END(soft_nmi) * and run it entirely with interrupts hard disabled. */ EXC_COMMON_BEGIN(soft_nmi_common) - mfspr r11,SPRN_SRR0 mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE @@ -2624,6 +2723,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) mtmsrd r9,1 kuap_kernel_restore r9, r10 + EXCEPTION_RESTORE_REGS hsrr=0 RFI_TO_KERNEL @@ -2645,19 +2745,24 @@ masked_Hinterrupt: .else masked_interrupt: .endif - lbz r11,PACAIRQHAPPENED(r13) - or r11,r11,r10 - stb r11,PACAIRQHAPPENED(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + lbz r9,PACAIRQHAPPENED(r13) + or r9,r9,r10 + stb r9,PACAIRQHAPPENED(r13) + + .if ! \hsrr cmpwi r10,PACA_IRQ_DEC bne 1f - lis r10,0x7fff - ori r10,r10,0xffff - mtspr SPRN_DEC,r10 + LOAD_REG_IMMEDIATE(r9, 0x7fffffff) + mtspr SPRN_DEC,r9 #ifdef CONFIG_PPC_WATCHDOG + lwz r9,PACA_EXGEN+EX_CCR(r13) b soft_nmi_common #else b 2f #endif + .endif + 1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK beq 2f xori r12,r12,MSR_EE /* clear MSR_EE */ @@ -2666,11 +2771,29 @@ masked_interrupt: .else mtspr SPRN_SRR1,r12 .endif - ori r11,r11,PACA_IRQ_HARD_DIS - stb r11,PACAIRQHAPPENED(r13) + ori r9,r9,PACA_IRQ_HARD_DIS + stb r9,PACAIRQHAPPENED(r13) 2: /* done */ - ld r10,PACA_EXGEN+EX_CTR(r13) - mtctr r10 + li r9,0 + .if \hsrr + stb r9,PACAHSRR_VALID(r13) + .else + stb r9,PACASRR_VALID(r13) + .endif + + SEARCH_RESTART_TABLE + cmpdi r12,0 + beq 3f + .if \hsrr + mtspr SPRN_HSRR0,r12 + .else + mtspr SPRN_SRR0,r12 + .endif +3: + + ld r9,PACA_EXGEN+EX_CTR(r13) + mtctr r9 + lwz r9,PACA_EXGEN+EX_CCR(r13) mtcrf 0x80,r9 std r1,PACAR1(r13) ld r9,PACA_EXGEN+EX_R9(r13) @@ -2881,7 +3004,7 @@ MASKED_INTERRUPT hsrr=1 USE_FIXED_SECTION(virt_trampolines) /* - * All code below __end_interrupts is treated as soft-masked. If + * All code below __end_soft_masked is treated as soft-masked. If * any code runs here with MSR[EE]=1, it must then cope with pending * soft interrupt being raised (i.e., by ensuring it is replayed). * diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index c9e2819b095a..c7022c41cc31 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); -bool check_kvm_guest(void) +int __init check_kvm_guest(void) { struct device_node *hyper_node; hyper_node = of_find_node_by_path("/hypervisor"); if (!hyper_node) - return false; + return 0; if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return false; + return 0; static_branch_enable(&kvm_guest); - return true; + + return 0; } +core_initcall(check_kvm_guest); // before kvm_guest_init() #endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2c57ece6671c..6010adcee16e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -103,6 +103,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) ori r12,r12,MSR_FP or r12,r12,r4 std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_FP(r5) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a8221ddcbd66..6b1ec9e3541b 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -142,42 +142,23 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) .macro SYSCALL_ENTRY trapno mfspr r9, SPRN_SRR1 - mfspr r10, SPRN_SRR0 + mfspr r12, SPRN_SRR0 LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */ - lis r12, 1f@h - ori r12, r12, 1f@l + lis r10, 1f@h + ori r10, r10, 1f@l mtspr SPRN_SRR1, r11 - mtspr SPRN_SRR0, r12 - mfspr r12,SPRN_SPRG_THREAD + mtspr SPRN_SRR0, r10 + mfspr r10,SPRN_SPRG_THREAD mr r11, r1 - lwz r1,TASK_STACK-THREAD(r12) - tovirt(r12, r12) + lwz r1,TASK_STACK-THREAD(r10) + tovirt(r10, r10) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE rfi 1: - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11, r1 - stw r10,_NIP(r11) - mflr r10 - stw r10, _LINK(r11) - mfcr r10 - rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ - stw r10,_CCR(r11) /* save registers */ -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#endif - lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r10,8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - addi r2,r12,-THREAD + stw r12,_NIP(r1) + mfcr r12 + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + stw r12,_CCR(r1) b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index e1360b88b6cb..7d72ee5ab387 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -701,39 +701,3 @@ _GLOBAL(abort) mfspr r13,SPRN_DBCR0 oris r13,r13,DBCR0_RST_SYSTEM@h mtspr SPRN_DBCR0,r13 - -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif - sync - mtspr SPRN_PID,r3 - isync /* Need an isync to flush shadow */ - /* TLBs after changing PID */ - blr - -/* We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 5c106ac36626..ddc978a2d381 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -532,6 +532,10 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_44x_kuep +#endif 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -743,6 +747,10 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ +#ifdef CONFIG_PPC_KUEP +0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ + patch_site 0b, patch__tlb_47x_kuep +#endif 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. @@ -780,20 +788,6 @@ _GLOBAL(__fixup_440A_mcheck) sync blr -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - /* * Init CPU state. This is called at boot time or for secondary CPUs * to setup initial TLB entries, setup IVORs, etc... @@ -1239,34 +1233,8 @@ head_start_common: isync blr -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align PAGE_SHIFT - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - -/* - * To support >32-bit physical addresses, we use an 8KB pgdir. - */ - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 - #ifdef CONFIG_SMP + .data .align 12 temp_boot_stack: .space 1024 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ece7f97bafff..79930b0bc781 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -194,8 +194,9 @@ CLOSE_FIXED_SECTION(first_256B) /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" +/* This value is used to mark exception frames on the stack. */ exception_marker: - .tc ID_72656773_68657265[TC],0x7265677368657265 + .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER .previous /* @@ -211,6 +212,8 @@ OPEN_TEXT_SECTION(0x100) USE_TEXT_SECTION() +#include "interrupt_64.S" + #ifdef CONFIG_PPC_BOOK3E /* * The booting_thread_hwid holds the thread id we want to boot in cpu @@ -997,23 +1000,3 @@ start_here_common: 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 .previous - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. - */ - .section ".bss" -/* - * pgd dir should be aligned to PGD_TABLE_SIZE which is 64K. - * We will need to find a better way to fix this - */ - .align 16 - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - - .globl empty_zero_page -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 7d445e4342c0..9bdb95f5694f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -786,28 +786,3 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ - .data - .globl sdata -sdata: - .globl empty_zero_page - .align PAGE_SHIFT -empty_zero_page: - .space PAGE_SIZE -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE table pointers, usually the kernel and current user - * pointer to their respective root page table (pgdir). - */ - .globl abatron_pteptrs -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 065178f19a3d..764edd860ed4 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -518,8 +518,6 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ - mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION li r0,1 mfspr r1,SPRN_SPRG_603_LRU @@ -531,9 +529,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -607,9 +611,15 @@ BEGIN_MMU_FTR_SECTION mfspr r2,SPRN_SRR1 rlwimi r2,r0,31-14,14,14 mtspr SPRN_SRR1,r2 -END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + mtcrf 0x80,r2 + tlbld r3 + rfi +MMU_FTR_SECTION_ELSE + mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r2 tlbld r3 rfi +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception @@ -756,9 +766,6 @@ PerformanceMonitor: * the kernel image to physical address PHYSICAL_START. */ relocate_kernel: - addis r9,r26,klimit@ha /* fetch klimit */ - lwz r25,klimit@l(r9) - addis r25,r25,-KERNELBASE@h lis r3,PHYSICAL_START@h /* Destination base address */ li r6,0 /* Destination offset */ li r5,0x4000 /* # bytes of memory to copy */ @@ -766,7 +773,8 @@ relocate_kernel: addi r0,r3,4f@l /* jump to the address of 4f */ mtctr r0 /* in copy and do the rest. */ bctr /* jump to the copy */ -4: mr r5,r25 +4: lis r5,_end-KERNELBASE@h + ori r5,r5,_end-KERNELBASE@l bl copy_and_flush /* copy the rest */ b turn_on_mmu @@ -924,12 +932,6 @@ _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ @@ -1024,58 +1026,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) rfi /* - * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next); - * - * Set up the segment registers for a new context. - */ -_ENTRY(switch_mmu_context) - lwz r3,MMCONTEXTID(r4) - cmpwi cr0,r3,0 - blt- 4f - mulli r3,r3,897 /* multiply context by skew factor */ - rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ -#ifdef CONFIG_PPC_KUEP - oris r3, r3, SR_NX@h /* Set Nx */ -#endif -#ifdef CONFIG_PPC_KUAP - oris r3, r3, SR_KS@h /* Set Ks */ -#endif - li r0,NUM_USER_SEGMENTS - mtctr r0 - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - lwz r4, MM_PGD(r4) - lis r5, abatron_pteptrs@ha - stw r4, abatron_pteptrs@l + 0x4(r5) -#endif -BEGIN_MMU_FTR_SECTION -#ifndef CONFIG_BDI_SWITCH - lwz r4, MM_PGD(r4) -#endif - tophys(r4, r4) - rlwinm r4, r4, 4, 0xffff01ff - mtspr SPRN_SDR1, r4 -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) - li r4,0 - isync -3: - mtsrin r3,r4 - addi r3,r3,0x111 /* next VSID */ - rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b - sync - isync - blr -4: trap - EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0 - blr -EXPORT_SYMBOL(switch_mmu_context) - -/* * An undocumented "feature" of 604e requires that the v bit * be cleared before changing BAT values. * @@ -1256,61 +1206,4 @@ setup_usbgecko_bat: blr #endif -#ifdef CONFIG_8260 -/* Jump into the system reset for the rom. - * We first disable the MMU, and then jump to the ROM reset address. - * - * r3 is the board info structure, r4 is the location for starting. - * I use this for building a small kernel that can load other kernels, - * rather than trying to write or rely on a rom monitor that can tftp load. - */ - .globl m8260_gorom -m8260_gorom: - mfmsr r0 - rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ - sync - mtmsr r0 - sync - mfspr r11, SPRN_HID0 - lis r10, 0 - ori r10,r10,HID0_ICE|HID0_DCE - andc r11, r11, r10 - mtspr SPRN_HID0, r11 - isync - li r5, MSR_ME|MSR_RI - lis r6,2f@h - addis r6,r6,-KERNELBASE@h - ori r6,r6,2f@l - mtspr SPRN_SRR0,r6 - mtspr SPRN_SRR1,r5 - isync - sync - rfi -2: - mtlr r4 - blr -#endif - - -/* - * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. - */ .data - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index f82470091697..87b806e8eded 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -128,37 +128,20 @@ BEGIN_FTR_SECTION mr r12, r13 lwz r13, THREAD_NORMSAVE(2)(r10) FTR_SECTION_ELSE -#endif mfcr r12 -#ifdef CONFIG_KVM_BOOKE_HV ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#else + mfcr r12 #endif mfspr r9, SPRN_SRR1 BOOKE_CLEAR_BTB(r11) - lwz r11, TASK_STACK - THREAD(r10) + mr r11, r1 + lwz r1, TASK_STACK - THREAD(r10) rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ - ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) - stw r12, _CCR(r11) /* save various registers */ - mflr r12 - stw r12,_LINK(r11) + ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r1) mfspr r12,SPRN_SRR0 - stw r1, GPR1(r11) - stw r1, 0(r11) - mr r1, r11 - stw r12,_NIP(r11) - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ - stw r2,GPR2(r11) - addi r12, r12, STACK_FRAME_REGS_MARKER@l - stw r9,_MSR(r11) - li r2, \trapno - stw r12, 8(r11) - stw r2,_TRAP(r11) - SAVE_GPR(0, r11) - SAVE_4GPRS(3, r11) - SAVE_2GPRS(7, r11) - - addi r2,r10,-THREAD + stw r12,_NIP(r1) b transfer_to_syscall /* jump to handler */ .endm diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index a1a5c3f10dc4..0f9642f36b49 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -985,20 +985,6 @@ _GLOBAL(abort) mtspr SPRN_DBCR0,r13 isync -_GLOBAL(set_context) - -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start @@ -1226,26 +1212,3 @@ _GLOBAL(restore_to_as0) */ 3: mr r3,r5 bl _start - -/* - * We put a few things here that have to be page-aligned. This stuff - * goes at the beginning of the data segment, which is page-aligned. - */ - .data - .align 12 - .globl sdata -sdata: - .globl empty_zero_page -empty_zero_page: - .space 4096 -EXPORT_SYMBOL(empty_zero_page) - .globl swapper_pg_dir -swapper_pg_dir: - .space PGD_TABLE_SIZE - -/* - * Room for two PTE pointers, usually the kernel and current user pointers - * to their respective root page table. - */ -abatron_pteptrs: - .space 8 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 8fc7a14e4d71..21a638aff72f 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -486,7 +486,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) return; reset: - regs->msr &= ~MSR_SE; + regs_set_return_msr(regs, regs->msr & ~MSR_SE); for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); @@ -537,7 +537,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, current->thread.last_hit_ubp[i] = bp[i]; info[i] = NULL; } - regs->msr |= MSR_SE; + regs_set_return_msr(regs, regs->msr | MSR_SE); return false; } diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e0938ba298f2..21bbd615ca41 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/sched/debug.h> /* for show_regs */ #include <asm/asm-prototypes.h> #include <asm/kup.h> @@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS]; typedef long (*syscall_fn)(long, long, long, long, long, long); +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool exit_must_hard_disable(void) +{ + return static_branch_unlikely(&interrupt_exit_not_reentrant); +} +#else +static inline bool exit_must_hard_disable(void) +{ + return true; +} +#endif + +/* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. + * + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. + */ +static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + if (exit_must_hard_disable() || !restartable) + __hard_EE_RI_disable(); + +#ifdef CONFIG_PPC64 + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable() || !restartable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + + return false; + } +#endif + return true; +} + /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5, return f(r3, r4, r5, r6, r7, r8); } -/* - * local irqs must be disabled. Returns false if the caller must re-enable - * them, check for new work, and try again. - * - * This should be called with local irqs disabled, but if they were previously - * enabled when the interrupt handler returns (indicating a process-context / - * synchronous interrupt) then irqs_enabled should be true. - */ -static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) -{ - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - if (clear_ri) - __hard_EE_RI_disable(); - else - __hard_irq_disable(); -#ifdef CONFIG_PPC64 - if (unlikely(lazy_irq_pending_nocheck())) { - /* Took an interrupt, may have more exit work to do. */ - if (clear_ri) - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - return false; - } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); -#endif - return true; -} - -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) -{ - if (__prep_irq_for_enabled_exit(clear_ri)) - return true; - - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - - return false; -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void) #endif } -/* - * This should be called after a syscall returns, with r3 the return value - * from the syscall. If this function returns non-zero, the system call - * exit assembly should additionally load all GPR registers and CTR and XER - * from the interrupt frame. - * - * The function graph tracer can not trace the return side of this function, - * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. - */ -notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs, - long scv) +static void check_return_regs_valid(struct pt_regs *regs) { - unsigned long ti_flags; - unsigned long ret = 0; - bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; - CT_WARN_ON(ct_state() == CONTEXT_USER); + if (trap_is_scv(regs)) + return; - kuap_assert_locked(); + trap = regs->trap; + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } - regs->result = r3; + if (srr0 == regs->nip && srr1 == regs->msr) + return; - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + barrier(); - ti_flags = current_thread_info()->flags; + if (!*validp) + return; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { - if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { - r3 = -r3; - regs->ccr |= 0x10000000; /* Set SO bit in CR */ - } + if (!warned) { + warned = true; + printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); } - if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { - if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; - else - regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); - } else { - regs->gpr[3] = r3; - } - - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; - } + *validp = 0; /* fixup */ +#endif +} - local_irq_disable(); +static notrace unsigned long +interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) +{ + unsigned long ti_flags; again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -303,7 +321,7 @@ again: ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); @@ -327,10 +345,10 @@ again: } } - user_enter_irqoff(); + check_return_regs_valid(regs); - /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { user_exit_irqoff(); local_irq_enable(); local_irq_disable(); @@ -352,90 +370,131 @@ again: return ret; } -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs, + long scv) { unsigned long ti_flags; - unsigned long flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * AMR can only have been unlocked if we interrupted the kernel. - */ kuap_assert_locked(); - local_irq_save(flags); - -again: - ti_flags = READ_ONCE(current_thread_info()->flags); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); /* returning to user: may enable */ - if (ti_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); - } + regs->result = r3; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; + ti_flags = current_thread_info()->flags; - /* See above restore_math comment */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ } } - user_enter_irqoff(); + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); + } else { + regs->gpr[3] = r3; + } - if (unlikely(!__prep_irq_for_enabled_exit(true))) { - user_exit_irqoff(); - local_irq_enable(); - local_irq_disable(); - goto again; + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; } - booke_load_dbcr0(); + local_irq_disable(); + ret = interrupt_exit_user_prepare_main(ret, regs); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; +#ifdef CONFIG_PPC64 + regs->exit_result = ret; #endif - account_cpu_user_exit(); + return ret; +} - /* Restore user access locks last */ - kuap_user_restore(regs); - kuep_unlock(); +#ifdef CONFIG_PPC64 +notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) +{ + /* + * This is called when detecting a soft-pending interrupt as well as + * an alternate-return interrupt. So we can't just have the alternate + * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless + * the soft-pending case were to fix things up as well). RI might be + * disabled, in which case it gets re-enabled by __hard_irq_disable(). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + + return regs->exit_result; +} +#endif + +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) +{ + unsigned long ret; + + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + CT_WARN_ON(ct_state() == CONTEXT_USER); + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ + kuap_assert_locked(); + + local_irq_disable(); + + ret = interrupt_exit_user_prepare_main(0, regs); + +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif return ret; } void preempt_schedule_irq(void); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { unsigned long flags; unsigned long ret = 0; unsigned long kuap; + bool stack_store = current_thread_info()->flags & + _TIF_EMULATE_STACK_STORE; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign kuap = kuap_get_and_assert_locked(); - if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); - ret = 1; - } - local_irq_save(flags); if (!arch_irq_disabled_regs(regs)) { @@ -469,17 +523,58 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + check_return_regs_valid(regs); + + /* + * Stack store exit can't be restarted because the interrupt + * stack frame might have been clobbered. + */ + if (!prep_irq_for_enabled_exit(unlikely(stack_store))) { + /* + * Replay pending soft-masked interrupts now. Don't + * just local_irq_enabe(); local_irq_disable(); because + * if we are returning from an asynchronous interrupt + * here, another one might hit after irqs are enabled, + * and it would exit via this same path allowing + * another to fire, and so on unbounded. + */ + hard_irq_disable(); + replay_soft_interrupts(); + /* Took an interrupt, may have more exit work to do. */ goto again; - } else { - /* Returning to a kernel context with local irqs disabled. */ - __hard_EE_RI_disable(); + } #ifdef CONFIG_PPC64 + /* + * An interrupt may clear MSR[EE] and set this concurrently, + * but it will be marked pending and the exit will be retried. + * This leaves a racy window where MSR[EE]=0 and HARD_DIS is + * clear, until interrupt_exit_kernel_restart() calls + * hard_irq_disable(), which will set HARD_DIS again. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + } else { + check_return_regs_valid(regs); + + if (unlikely(stack_store)) + __hard_EE_RI_disable(); + /* + * Returning to a kernel context with local irqs disabled. + * Here, if EE was enabled in the interrupted context, enable + * it on return as well. A problem exists here where a soft + * masked interrupt may have cleared MSR[EE] and set HARD_DIS + * here, and it will still exist on return to the caller. This + * will be resolved by the masked interrupt firing again. + */ if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; -#endif +#endif /* CONFIG_PPC64 */ } + if (unlikely(stack_store)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); + ret = 1; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -494,3 +589,46 @@ again: return ret; } + +#ifdef CONFIG_PPC64 +notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= interrupt_exit_user_prepare(regs); + + return regs->exit_result; +} + +/* + * No real need to return a value here because the stack store case does not + * get restarted. + */ +notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + if (regs->softe == IRQS_ENABLED) + trace_hardirqs_off(); + + BUG_ON(user_mode(regs)); + + return interrupt_exit_kernel_prepare(regs); +} +#endif diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S new file mode 100644 index 000000000000..4063e8a3f704 --- /dev/null +++ b/arch/powerpc/kernel/interrupt_64.S @@ -0,0 +1,770 @@ +#include <asm/asm-offsets.h> +#include <asm/bug.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif +#include <asm/feature-fixups.h> +#include <asm/head-64.h> +#include <asm/hw_irq.h> +#include <asm/kup.h> +#include <asm/mmu.h> +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/tm.h> + + .section ".toc","aw" +SYS_CALL_TABLE: + .tc sys_call_table[TC],sys_call_table + +#ifdef CONFIG_COMPAT +COMPAT_SYS_CALL_TABLE: + .tc compat_sys_call_table[TC],compat_sys_call_table +#endif + .previous + + .align 7 + +.macro DEBUG_SRR_VALID srr +#ifdef CONFIG_PPC_RFI_SRR_DEBUG + .ifc \srr,srr + mfspr r11,SPRN_SRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_SRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .else + mfspr r11,SPRN_HSRR0 + ld r12,_NIP(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + mfspr r11,SPRN_HSRR1 + ld r12,_MSR(r1) +100: tdne r11,r12 + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + .endif +#endif +.endm + +#ifdef CONFIG_PPC_BOOK3S +.macro system_call_vectored name trapnr + .globl system_call_vectored_\name +system_call_vectored_\name: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + SCV_INTERRUPT_TO_KERNEL + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_LINK(r1) + std r11,_CTR(r1) + + li r11,\trapnr + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +BEGIN_FTR_SECTION + HMT_MEDIUM +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * scv enters with MSR[EE]=1 and is immediately considered soft-masked. + * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED, + * and interrupts may be masked and pending already. + * system_call_exception() will call trace_hardirqs_off() which means + * interrupts could already have been blocked before trace_hardirqs_off, + * but this is the best we can do. + */ + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_vectored_\name\()_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,1 /* scv */ + bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Lsyscall_vectored_\name\()_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_vectored_\name\()_restart + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + + ld r2,_CCR(r1) + ld r4,_NIP(r1) + ld r5,_MSR(r1) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + cmpdi r3,0 + bne .Lsyscall_vectored_\name\()_restore_regs + + /* rfscv returns with LR->NIA and CTR->MSR */ + mtlr r4 + mtctr r5 + + /* Could zero these as per ABI, but we may consider a stricter ABI + * which preserves these if libc implementations can benefit, so + * restore them for now until further measurement is done. */ + ld r0,GPR0(r1) + ld r4,GPR4(r1) + ld r5,GPR5(r1) + ld r6,GPR6(r1) + ld r7,GPR7(r1) + ld r8,GPR8(r1) + /* Zero volatile regs that may contain sensitive kernel data */ + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtspr SPRN_XER,r0 + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFSCV_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_vectored_\name\()_restore_regs: + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 + + ld r3,_CTR(r1) + ld r4,_LINK(r1) + ld r5,_XER(r1) + + REST_NVGPRS(r1) + ld r0,GPR0(r1) + mtcr r2 + mtctr r3 + mtlr r4 + mtspr SPRN_XER,r5 + REST_10GPRS(2, r1) + REST_2GPRS(12, r1) + ld r1,GPR1(r1) + RFI_TO_USER +.Lsyscall_vectored_\name\()_rst_end: + +syscall_vectored_\name\()_restart: +_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_vectored_\name\()_rst_start +1: + +SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b) +RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart) + +.endm + +system_call_vectored common 0x3000 + +/* + * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0 + * which is tested by system_call_exception when r0 is -1 (as set by vector + * entry code). + */ +system_call_vectored sigill 0x7ff0 + + +/* + * Entered via kernel return set up by kernel/sstep.c, must match entry regs + */ + .globl system_call_vectored_emulate +system_call_vectored_emulate: +_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate) + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + b system_call_vectored_common +#endif /* CONFIG_PPC_BOOK3S */ + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common_real +system_call_common_real: +_ASM_NOKPROBE_SYMBOL(system_call_common_real) + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + mtmsrd r10 + + .balign IFETCH_ALIGN_BYTES + .globl system_call_common +system_call_common: +_ASM_NOKPROBE_SYMBOL(system_call_common) +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +BEGIN_FTR_SECTION + extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */ + bne tabort_syscall +END_FTR_SECTION_IFSET(CPU_FTR_TM) +#endif + mr r10,r1 + ld r1,PACAKSAVE(r13) + std r10,0(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + std r0,GPR0(r1) + std r10,GPR1(r1) + std r2,GPR2(r1) +#ifdef CONFIG_PPC_FSL_BOOK3E +START_BTB_FLUSH_SECTION + BTB_FLUSH(r10) +END_BTB_FLUSH_SECTION +#endif + ld r2,PACATOC(r13) + mfcr r12 + li r11,0 + /* Can we avoid saving r3-r8 in common case? */ + std r3,GPR3(r1) + std r4,GPR4(r1) + std r5,GPR5(r1) + std r6,GPR6(r1) + std r7,GPR7(r1) + std r8,GPR8(r1) + /* Zero r9-r12, this should only be required when restoring all GPRs */ + std r11,GPR9(r1) + std r11,GPR10(r1) + std r11,GPR11(r1) + std r11,GPR12(r1) + std r9,GPR13(r1) + SAVE_NVGPRS(r1) + std r11,_XER(r1) + std r11,_CTR(r1) + mflr r10 + + /* + * This clears CR0.SO (bit 28), which is the error indication on + * return from this system call. + */ + rldimi r12,r11,28,(63-28) + li r11,0xc00 + std r10,_LINK(r1) + std r11,_TRAP(r1) + std r12,_CCR(r1) + addi r10,r1,STACK_FRAME_OVERHEAD + ld r11,exception_marker@toc(r2) + std r11,-16(r10) /* "regshere" marker */ + +#ifdef CONFIG_PPC_BOOK3S + li r11,1 + stb r11,PACASRR_VALID(r13) +#endif + + /* + * We always enter kernel from userspace with irq soft-mask enabled and + * nothing pending. system_call_exception() will call + * trace_hardirqs_off(). + */ + li r11,IRQS_ALL_DISABLED + li r12,-1 /* Set MSR_EE and MSR_RI */ + stb r11,PACAIRQSOFTMASK(r13) + mtmsrd r12,1 + + /* Calling convention has r9 = orig r0, r10 = regs */ + mr r9,r0 + bl system_call_exception + +.Lsyscall_exit: + addi r4,r1,STACK_FRAME_OVERHEAD + li r5,0 /* !scv */ + bl syscall_exit_prepare + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S +.Lsyscall_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- syscall_restart +#endif + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + + ld r2,_CCR(r1) + ld r6,_LINK(r1) + mtlr r6 + +#ifdef CONFIG_PPC_BOOK3S + lbz r4,PACASRR_VALID(r13) + cmpdi r4,0 + bne 1f + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif + ld r4,_NIP(r1) + ld r5,_MSR(r1) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r5 +1: + DEBUG_SRR_VALID srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + cmpdi r3,0 + bne .Lsyscall_restore_regs + /* Zero volatile regs that may contain sensitive kernel data */ + li r0,0 + li r4,0 + li r5,0 + li r6,0 + li r7,0 + li r8,0 + li r9,0 + li r10,0 + li r11,0 + li r12,0 + mtctr r0 + mtspr SPRN_XER,r0 +.Lsyscall_restore_regs_cont: + +BEGIN_FTR_SECTION + HMT_MEDIUM_LOW +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ + mtcr r2 + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r13,GPR13(r1) + ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ + +.Lsyscall_restore_regs: + ld r3,_CTR(r1) + ld r4,_XER(r1) + REST_NVGPRS(r1) + mtctr r3 + mtspr SPRN_XER,r4 + ld r0,GPR0(r1) + REST_8GPRS(4, r1) + ld r12,GPR12(r1) + b .Lsyscall_restore_regs_cont +.Lsyscall_rst_end: + +#ifdef CONFIG_PPC_BOOK3S +syscall_restart: +_ASM_NOKPROBE_SYMBOL(syscall_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + ld r3,RESULT(r1) + addi r4,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl syscall_exit_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Lsyscall_rst_start +1: + +SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b) +RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart) +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tabort_syscall: +_ASM_NOKPROBE_SYMBOL(tabort_syscall) + /* Firstly we need to enable TM in the kernel */ + mfmsr r10 + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG + mtmsrd r10, 0 + + /* tabort, this dooms the transaction, nothing else */ + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) + + /* + * Return directly to userspace. We have corrupted user register state, + * but userspace will never see that register state. Execution will + * resume after the tbegin of the aborted transaction with the + * checkpointed register state. + */ + li r9, MSR_RI + andc r10, r10, r9 + mtmsrd r10, 1 + mtspr SPRN_SRR0, r11 + mtspr SPRN_SRR1, r12 + RFI_TO_USER + b . /* prevent speculative execution */ +#endif + + /* + * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not + * touched, no exit work created, then this can be used. + */ + .balign IFETCH_ALIGN_BYTES + .globl fast_interrupt_return_srr +fast_interrupt_return_srr: +_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) + kuap_check_amr r3, r4 + ld r5,_MSR(r1) + andi. r0,r5,MSR_PR +#ifdef CONFIG_PPC_BOOK3S + beq 1f + kuap_user_restore r3, r4 + b .Lfast_user_interrupt_return_srr +1: kuap_kernel_restore r3, r4 + andi. r0,r5,MSR_RI + li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ + bne+ .Lfast_kernel_interrupt_return_srr + addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b . /* should not get here */ +#else + bne .Lfast_user_interrupt_return_srr + b .Lfast_kernel_interrupt_return_srr +#endif + +.macro interrupt_return_macro srr + .balign IFETCH_ALIGN_BYTES + .globl interrupt_return_\srr +interrupt_return_\srr\(): +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) + ld r4,_MSR(r1) + andi. r0,r4,MSR_PR + beq interrupt_return_\srr\()_kernel +interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_user_prepare + cmpdi r3,0 + bne- .Lrestore_nvgprs_\srr +.Lrestore_nvgprs_\srr\()_cont: + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +#ifdef CONFIG_PPC_BOOK3S +.Linterrupt_return_\srr\()_user_rst_start: + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_user_restart +#endif + li r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS + +.Lfast_user_interrupt_return_\srr\(): +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + lbz r4,PACAIRQSOFTMASK(r13) + tdnei r4,IRQS_ENABLED +#endif + +BEGIN_FTR_SECTION + ld r10,_PPR(r1) + mtspr SPRN_PPR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + REST_GPR(13, r1) + + mtcr r3 + mtlr r4 + mtctr r5 + mtspr SPRN_XER,r6 + + REST_4GPRS(2, r1) + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_USER + .else + HRFI_TO_USER + .endif + b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_user_rst_end: + +.Lrestore_nvgprs_\srr\(): + REST_NVGPRS(r1) + b .Lrestore_nvgprs_\srr\()_cont + +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_\srr\()_user_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_user_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_user_rst_start +1: + +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b) +RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart) +#endif + + .balign IFETCH_ALIGN_BYTES +interrupt_return_\srr\()_kernel: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) + addi r3,r1,STACK_FRAME_OVERHEAD + bl interrupt_exit_kernel_prepare + + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ +.Linterrupt_return_\srr\()_kernel_rst_start: + ld r11,SOFTE(r1) + cmpwi r11,IRQS_ENABLED + stb r11,PACAIRQSOFTMASK(r13) + bne 1f +#ifdef CONFIG_PPC_BOOK3S + lbz r11,PACAIRQHAPPENED(r13) + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + bne- interrupt_return_\srr\()_kernel_restart +#endif + li r11,0 + stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS +1: + +.Lfast_kernel_interrupt_return_\srr\(): + cmpdi cr1,r3,0 +#ifdef CONFIG_PPC_BOOK3S + .ifc \srr,srr + lbz r4,PACASRR_VALID(r13) + .else + lbz r4,PACAHSRR_VALID(r13) + .endif + cmpdi r4,0 + li r4,0 + bne 1f +#endif + ld r11,_NIP(r1) + ld r12,_MSR(r1) + .ifc \srr,srr + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACASRR_VALID(r13) +#endif + .else + mtspr SPRN_HSRR0,r11 + mtspr SPRN_HSRR1,r12 +1: +#ifdef CONFIG_PPC_BOOK3S + stb r4,PACAHSRR_VALID(r13) +#endif + .endif + DEBUG_SRR_VALID \srr + +BEGIN_FTR_SECTION + stdcx. r0,0,r1 /* to clear the reservation */ +FTR_SECTION_ELSE + ldarx r0,0,r1 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + + ld r3,_LINK(r1) + ld r4,_CTR(r1) + ld r5,_XER(r1) + ld r6,_CCR(r1) + li r0,0 + + REST_4GPRS(7, r1) + REST_2GPRS(11, r1) + + mtlr r3 + mtctr r4 + mtspr SPRN_XER,r5 + + /* + * Leaving a stale exception_marker on the stack can confuse + * the reliable stack unwinder later on. Clear it. + */ + std r0,STACK_FRAME_OVERHEAD-16(r1) + + REST_4GPRS(2, r1) + + bne- cr1,1f /* emulate stack store */ + mtcr r6 + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ + +1: /* + * Emulate stack store with update. New r1 value was already calculated + * and updated in our interrupt regs by emulate_loadstore, but we can't + * store the previous value of r1 to the stack before re-loading our + * registers from it, otherwise they could be clobbered. Use + * PACA_EXGEN as temporary storage to hold the store data, as + * interrupts are disabled here so it won't be clobbered. + */ + mtcr r6 + std r9,PACA_EXGEN+0(r13) + addi r9,r1,INT_FRAME_SIZE /* get original r1 */ + REST_GPR(6, r1) + REST_GPR(0, r1) + REST_GPR(1, r1) + std r9,0(r1) /* perform store component of stdu */ + ld r9,PACA_EXGEN+0(r13) + + .ifc \srr,srr + RFI_TO_KERNEL + .else + HRFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.Linterrupt_return_\srr\()_kernel_rst_end: + +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_\srr\()_kernel_restart: +_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) + GET_PACA(r13) + ld r1,PACA_EXIT_SAVE_R1(r13) + ld r2,PACATOC(r13) + addi r3,r1,STACK_FRAME_OVERHEAD + li r11,IRQS_ALL_DISABLED + stb r11,PACAIRQSOFTMASK(r13) + bl interrupt_exit_kernel_restart + std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ + b .Linterrupt_return_\srr\()_kernel_rst_start +1: + +SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b) +RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart) +#endif + +.endm + +interrupt_return_macro srr +#ifdef CONFIG_PPC_BOOK3S +interrupt_return_macro hsrr + + .globl __end_soft_masked +__end_soft_masked: +DEFINE_FIXED_SYMBOL(__end_soft_masked) +#endif /* CONFIG_PPC_BOOK3S */ + +#ifdef CONFIG_PPC_BOOK3S +_GLOBAL(ret_from_fork_scv) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_vectored_common_exit +#endif + +_GLOBAL(ret_from_fork) + bl schedule_tail + REST_NVGPRS(r1) + li r3,0 /* fork() return value */ + b .Lsyscall_exit + +_GLOBAL(ret_from_kernel_thread) + bl schedule_tail + REST_NVGPRS(r1) + mtctr r14 + mr r3,r15 +#ifdef PPC64_ELF_ABI_v2 + mr r12,r14 +#endif + bctrl + li r3,0 + b .Lsyscall_exit diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 72cb45393ef2..91e63eac4e8f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -121,6 +121,7 @@ void replay_soft_interrupts(void) ppc_save_regs(®s); regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; again: if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -217,6 +218,100 @@ static inline void replay_soft_interrupts_irqrestore(void) #define replay_soft_interrupts_irqrestore() replay_soft_interrupts() #endif +#ifdef CONFIG_CC_HAS_ASM_GOTO +notrace void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm_volatile_goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + return; + +happened: + irq_happened = get_irq_happened(); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = get_irq_happened(); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + replay_soft_interrupts_irqrestore(); + local_paca->irq_happened = 0; + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + __hard_irq_enable(); + preempt_enable(); +} +#else notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -288,6 +383,7 @@ notrace void arch_local_irq_restore(unsigned long mask) __hard_irq_enable(); preempt_enable(); } +#endif EXPORT_SYMBOL(arch_local_irq_restore); /* diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index ce87dc5ea23c..5277cf582c16 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,10 +11,10 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct ppc_inst *addr = (struct ppc_inst *)jump_entry_code(entry); + u32 *addr = (u32 *)jump_entry_code(entry); if (type == JUMP_LABEL_JMP) patch_branch(addr, jump_entry_target(entry), 0); else - patch_instruction(addr, ppc_inst(PPC_INST_NOP)); + patch_instruction(addr, ppc_inst(PPC_RAW_NOP())); } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7dd2ad3603ad..bdee7262c080 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -147,7 +147,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 0; if (*(u32 *)regs->nip == BREAK_INSTR) - regs->nip += BREAK_INSTR_SIZE; + regs_add_return_ip(regs, BREAK_INSTR_SIZE); return 1; } @@ -372,7 +372,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) { - regs->nip = pc; + regs_set_return_ip(regs, pc); } /* @@ -394,7 +394,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, case 'c': /* handle the optional parameter */ if (kgdb_hex2long(&ptr, &addr)) - linux_regs->nip = addr; + regs_set_return_ip(linux_regs, addr); atomic_set(&kgdb_cpu_doing_single_step, -1); /* set the trace bit if we're stepping */ @@ -402,9 +402,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, #ifdef CONFIG_PPC_ADV_DEBUG_REGS mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); - linux_regs->msr |= MSR_DE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE); #else - linux_regs->msr |= MSR_SE; + regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE); #endif atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); @@ -417,11 +417,10 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { + u32 instr, *addr = (u32 *)bpt->bpt_addr; int err; - unsigned int instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, (unsigned *) addr); + err = get_kernel_nofault(instr, addr); if (err) return err; @@ -429,7 +428,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (err) return -EFAULT; - *(unsigned int *)bpt->saved_instr = instr; + *(u32 *)bpt->saved_instr = instr; return 0; } @@ -438,7 +437,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { int err; unsigned int instr = *(unsigned int *)bpt->saved_instr; - struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; + u32 *addr = (u32 *)bpt->bpt_addr; err = patch_instruction(addr, ppc_inst(instr)); if (err) diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 660138f6c4b2..7154d58338cc 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -39,7 +39,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * On powerpc, NIP is *before* this instruction for the * pre handler */ - regs->nip -= MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, -MCOUNT_INSN_SIZE); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -48,7 +48,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, * Emulate singlestep (and also recover regs->nip) * as if there is a nop */ - regs->nip += MCOUNT_INSN_SIZE; + regs_add_return_ip(regs, MCOUNT_INSN_SIZE); if (unlikely(p->post_handler)) { kcb->kprobe_status = KPROBE_HIT_SSDONE; p->post_handler(p, regs, 0); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index c64a5feaebbe..cbc28d1a2e1b 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,11 +19,13 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <linux/moduleloader.h> #include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> #include <asm/inst.h> +#include <asm/set_memory.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -103,28 +105,42 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + + if (strict_module_rwx_enabled()) { + set_memory_ro((unsigned long)page, 1); + set_memory_x((unsigned long)page, 1); + } + return page; +} + int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr); + struct ppc_inst insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); ret = -EINVAL; - } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) { - printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n"); + } else if (IS_MTMSRD(insn) || IS_RFID(insn)) { + printk("Cannot register a kprobe on mtmsr[d]/rfi[d]\n"); ret = -EINVAL; } else if ((unsigned long)p->addr & ~PAGE_MASK && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) { + ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } preempt_disable(); prev = get_kprobe(p->addr - 1); preempt_enable_no_resched(); - if (prev && - ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)prev->ainsn.insn))) { + if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } @@ -138,7 +154,7 @@ int arch_prepare_kprobe(struct kprobe *p) } if (!ret) { - patch_instruction((struct ppc_inst *)p->ainsn.insn, insn); + patch_instruction(p->ainsn.insn, insn); p->opcode = ppc_inst_val(insn); } @@ -149,13 +165,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe); void arch_arm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION))); } NOKPROBE_SYMBOL(arch_arm_kprobe); void arch_disarm_kprobe(struct kprobe *p) { - patch_instruction((struct ppc_inst *)p->addr, ppc_inst(p->opcode)); + WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode))); } NOKPROBE_SYMBOL(arch_disarm_kprobe); @@ -178,7 +194,7 @@ static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs * variant as values in regs could play a part in * if the trap is taken or not */ - regs->nip = (unsigned long)p->ainsn.insn; + regs_set_return_ip(regs, (unsigned long)p->ainsn.insn); } static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -228,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); + struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); @@ -319,8 +335,9 @@ int kprobe_handler(struct pt_regs *regs) kprobe_opcode_t insn = *p->ainsn.insn; if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) { /* Turn off 'trace' bits */ - regs->msr &= ~MSR_SINGLESTEP; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); goto no_kprobe; } @@ -415,7 +432,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * we end up emulating it in kprobe_handler(), which increments the nip * again. */ - regs->nip = orig_ret_address - 4; + regs_set_return_ip(regs, orig_ret_address - 4); regs->link = orig_ret_address; return 0; @@ -439,7 +456,7 @@ int kprobe_post_handler(struct pt_regs *regs) if (!cur || user_mode(regs)) return 0; - len = ppc_inst_len(ppc_inst_read((struct ppc_inst *)cur->ainsn.insn)); + len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn)); /* make sure we got here for instruction we have a kprobe on */ if (((unsigned long)cur->ainsn.insn + len) != regs->nip) return 0; @@ -450,8 +467,8 @@ int kprobe_post_handler(struct pt_regs *regs) } /* Adjust nip to after the single-stepped instruction */ - regs->nip = (unsigned long)cur->addr + len; - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr + len); + regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr); /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -490,9 +507,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * and allow the page fault handler to continue as a * normal page fault. */ - regs->nip = (unsigned long)cur->addr; - regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */ - regs->msr |= kcb->kprobe_saved_msr; + regs_set_return_ip(regs, (unsigned long)cur->addr); + /* Turn off 'trace' bits */ + regs_set_return_msr(regs, + (regs->msr & ~MSR_SINGLESTEP) | + kcb->kprobe_saved_msr); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -506,7 +525,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 15e7b4900689..47a683cd00d2 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -274,7 +274,7 @@ void mce_common_process_ue(struct pt_regs *regs, entry = search_kernel_exception_table(regs->nip); if (entry) { mce_err->ignore_event = true; - regs->nip = extable_fixup(entry); + regs_set_return_ip(regs, extable_fixup(entry)); } } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 667104d4c455..c2f55fe7092d 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -463,7 +463,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, pfn = addr_to_pfn(regs, regs->nip); if (pfn != ULONG_MAX) { instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK); - instr = ppc_inst_read((struct ppc_inst *)instr_addr); + instr = ppc_inst_read((u32 *)instr_addr); if (!analyse_instr(&op, &tmp, instr)) { pfn = addr_to_pfn(regs, op.ea); *addr = op.ea; @@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, return -1; } -static int mce_handle_ierror(struct pt_regs *regs, +static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, const struct mce_ierror_table table[], struct mce_error_info *mce_err, uint64_t *addr, uint64_t *phys_addr) { - uint64_t srr1 = regs->msr; int handled = 0; int i; @@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs, } static long mce_handle_error(struct pt_regs *regs, + unsigned long srr1, const struct mce_derror_table dtable[], const struct mce_ierror_table itable[]) { struct mce_error_info mce_err = { 0 }; uint64_t addr, phys_addr = ULONG_MAX; - uint64_t srr1 = regs->msr; long handled; if (SRR1_MC_LOADSTORE(srr1)) handled = mce_handle_derror(regs, dtable, &mce_err, &addr, &phys_addr); else - handled = mce_handle_ierror(regs, itable, &mce_err, &addr, + handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr, &phys_addr); if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) @@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) /* P7 DD1 leaves top bits of DSISR undefined */ regs->dsisr &= 0x0000ffff; - return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p7_derror_table, mce_p7_ierror_table); } long __machine_check_early_realmode_p8(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); + return mce_handle_error(regs, regs->msr, + mce_p8_derror_table, mce_p8_ierror_table); } long __machine_check_early_realmode_p9(struct pt_regs *regs) { + unsigned long srr1 = regs->msr; + /* * On POWER9 DD2.1 and below, it's possible to get a machine check * caused by a paste instruction where only DSISR bit 25 is set. This @@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) return 1; - return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p9_derror_table, mce_p9_ierror_table); } long __machine_check_early_realmode_p10(struct pt_regs *regs) { - return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table); + unsigned long srr1 = regs->msr; + + /* + * Async machine check due to bad real address from store comes with + * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in + * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table + * so it will find the cause (which describes it correctly as a store + * error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + return mce_handle_error(regs, srr1, + mce_p10_derror_table, mce_p10_ierror_table); } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6a076bef2932..39ab15419592 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -388,9 +388,3 @@ _GLOBAL(start_secondary_resume) bl start_secondary b . #endif /* CONFIG_SMP */ - -/* - * This routine is just here to keep GCC happy - sigh... - */ -_GLOBAL(__main) - blr diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 3f35c8d20be7..ed04a3ba66fe 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -92,12 +92,14 @@ int module_finalize(const Elf_Ehdr *hdr, static __always_inline void * __module_alloc(unsigned long size, unsigned long start, unsigned long end) { + pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index c27b8687b82a..f417afc08d33 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -145,10 +145,9 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val))) + if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val))) return 0; - if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | - PPC_LO(val))) + if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val))) return 0; return 1; } @@ -175,16 +174,10 @@ static uint32_t do_plt_call(void *location, entry++; } - /* - * lis r12, sym@ha - * addi r12, r12, sym@l - * mtctr r12 - * bctr - */ - entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val); - entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val); - entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12); - entry->jump[3] = PPC_INST_BCTR; + entry->jump[0] = PPC_RAW_LIS(_R12, PPC_HA(val)); + entry->jump[1] = PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)); + entry->jump[2] = PPC_RAW_MTCTR(_R12); + entry->jump[3] = PPC_RAW_BCTR(); pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index ae2b188365b1..6baa676e7cb6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -122,27 +122,19 @@ struct ppc64_stub_entry * the stub, but it's significantly shorter to put these values at the * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. - * - * addis r11,r2, <high> - * addi r11,r11, <low> - * std r2,R2_STACK_OFFSET(r1) - * ld r12,32(r11) - * ld r2,40(r11) - * mtctr r12 - * bctr */ static u32 ppc64_stub_insns[] = { - PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2), - PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11), + PPC_RAW_ADDIS(_R11, _R2, 0), + PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ - PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET, - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32, + PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET), + PPC_RAW_LD(_R12, _R11, 32), #ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ - PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40, + PPC_RAW_LD(_R2, _R11, 40), #endif - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* Count how many different 24-bit relocations (different symbol, @@ -336,21 +328,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #ifdef CONFIG_MPROFILE_KERNEL -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -/* - * ld r12,PACATOC(r13) - * addis r12,r12,<high> - * addi r12,r12,<low> - * mtctr r12 - * bctr - */ static u32 stub_insns[] = { - PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, - PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), - PPC_INST_MTCTR | __PPC_RS(R12), - PPC_INST_BCTR, + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), }; /* @@ -507,7 +490,7 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) return 1; - if (*instruction != PPC_INST_NOP) { + if (*instruction != PPC_RAW_NOP()) { pr_err("%s: Expected nop after call, got %08x at %pS\n", me->name, *instruction, instruction); return 0; @@ -696,21 +679,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * ld r2, ...(r12) * add r2, r2, r12 */ - if ((((uint32_t *)location)[0] & ~0xfffc) != - (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12))) + if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0)) break; - if (((uint32_t *)location)[1] != - (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12))) + if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12)) break; /* * If found, replace it with: * addis r2, r12, (.TOC.-func)@ha * addi r2, r2, (.TOC.-func)@l */ - ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) | - __PPC_RA(R12) | PPC_HA(value); - ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) | - __PPC_RA(R2) | PPC_LO(value); + ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value)); + ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value)); break; case R_PPC64_REL16_HA: diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index cdf87086fa33..c79899abcec8 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -18,24 +18,16 @@ #include <asm/ppc-opcode.h> #include <asm/inst.h> -#define TMPL_CALL_HDLR_IDX \ - (optprobe_template_call_handler - optprobe_template_entry) -#define TMPL_EMULATE_IDX \ - (optprobe_template_call_emulate - optprobe_template_entry) -#define TMPL_RET_IDX \ - (optprobe_template_ret - optprobe_template_entry) -#define TMPL_OP_IDX \ - (optprobe_template_op_address - optprobe_template_entry) -#define TMPL_INSN_IDX \ - (optprobe_template_insn - optprobe_template_entry) -#define TMPL_END_IDX \ - (optprobe_template_end - optprobe_template_entry) - -DEFINE_INSN_CACHE_OPS(ppc_optinsn); +#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry) static bool insn_page_in_use; -static void *__ppc_alloc_insn_page(void) +void *alloc_optinsn_page(void) { if (insn_page_in_use) return NULL; @@ -43,20 +35,11 @@ static void *__ppc_alloc_insn_page(void) return &optinsn_slot; } -static void __ppc_free_insn_page(void *page __maybe_unused) +void free_optinsn_page(void *page) { insn_page_in_use = false; } -struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { - .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), - .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), - /* insn_size initialized later */ - .alloc = __ppc_alloc_insn_page, - .free = __ppc_free_insn_page, - .nr_garbage = 0, -}; - /* * Check if we can optimize this probe. Returns NIP post-emulation if this can * be optimized and 0 otherwise. @@ -66,6 +49,7 @@ static unsigned long can_optimize(struct kprobe *p) struct pt_regs regs; struct instruction_op op; unsigned long nip = 0; + unsigned long addr = (unsigned long)p->addr; /* * kprobe placed for kretprobe during boot time @@ -73,7 +57,7 @@ static unsigned long can_optimize(struct kprobe *p) * So further checks can be skipped. */ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) - return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + return addr + sizeof(kprobe_opcode_t); /* * We only support optimizing kernel addresses, but not @@ -81,11 +65,11 @@ static unsigned long can_optimize(struct kprobe *p) * * FIXME: Optimize kprobes placed in module addresses. */ - if (!is_kernel_addr((unsigned long)p->addr)) + if (!is_kernel_addr(addr)) return 0; memset(®s, 0, sizeof(struct pt_regs)); - regs.nip = (unsigned long)p->addr; + regs.nip = addr; regs.trap = 0x0; regs.msr = MSR_KERNEL; @@ -100,9 +84,8 @@ static unsigned long can_optimize(struct kprobe *p) * Ensure that the instruction is not a conditional branch, * and that can be emulated. */ - if (!is_conditional_branch(ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) && - analyse_instr(&op, ®s, - ppc_inst_read((struct ppc_inst *)p->ainsn.insn)) == 1) { + if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) && + analyse_instr(&op, ®s, ppc_inst_read(p->ainsn.insn)) == 1) { emulate_update_regs(®s, &op); nip = regs.nip; } @@ -123,7 +106,7 @@ static void optimized_callback(struct optimized_kprobe *op, kprobes_inc_nmissed_count(&op->kp); } else { __this_cpu_write(current_kprobe, &op->kp); - regs->nip = (unsigned long)op->kp.addr; + regs_set_return_ip(regs, (unsigned long)op->kp.addr); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); @@ -136,19 +119,15 @@ NOKPROBE_SYMBOL(optimized_callback); void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { if (op->optinsn.insn) { - free_ppc_optinsn_slot(op->optinsn.insn, 1); + free_optinsn_slot(op->optinsn.insn, 1); op->optinsn.insn = NULL; } } static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) { - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_LIS(reg, IMM_H(val)))); - addr++; - - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } /* @@ -157,34 +136,11 @@ static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t * */ static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr) { - /* lis reg,(op)@highest */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) | - ((val >> 48) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@higher */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 32) & 0xffff))); - addr++; - - /* rldicr reg,reg,32,31 */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) | - ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31))); - addr++; - - /* oris reg,reg,(op)@h */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) | - ___PPC_RS(reg) | ((val >> 16) & 0xffff))); - addr++; - - /* ori reg,reg,(op)@l */ - patch_instruction((struct ppc_inst *)addr, - ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) | - ___PPC_RS(reg) | (val & 0xffff))); + patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val)))); + patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32))); + patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val)))); + patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val)))); } static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr) @@ -198,19 +154,18 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { struct ppc_inst branch_op_callback, branch_emulate_step, temp; - kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff; + unsigned long op_callback_addr, emulate_step_addr; + kprobe_opcode_t *buff; long b_offset; unsigned long nip, size; int rc, i; - kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; - nip = can_optimize(p); if (!nip) return -EILSEQ; /* Allocate instruction slot for detour buffer */ - buff = get_ppc_optinsn_slot(); + buff = get_optinsn_slot(); if (!buff) return -ENOMEM; @@ -228,8 +183,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) goto error; /* Check if the return address is also within 32MB range */ - b_offset = (unsigned long)(buff + TMPL_RET_IDX) - - (unsigned long)nip; + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip; if (!is_offset_in_branch_range(b_offset)) goto error; @@ -238,8 +192,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int); pr_devel("Copying template to %p, size %lu\n", buff, size); for (i = 0; i < size; i++) { - rc = patch_instruction((struct ppc_inst *)(buff + i), - ppc_inst(*(optprobe_template_entry + i))); + rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i))); if (rc < 0) goto error; } @@ -253,51 +206,48 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 2. branch to optimized_callback() and emulate_step() */ - op_callback_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("optimized_callback"); - emulate_step_addr = (kprobe_opcode_t *)ppc_kallsyms_lookup_name("emulate_step"); + op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback"); + emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step"); if (!op_callback_addr || !emulate_step_addr) { WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n"); goto error; } - rc = create_branch(&branch_op_callback, - (struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - (unsigned long)op_callback_addr, - BRANCH_SET_LINK); + rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX, + op_callback_addr, BRANCH_SET_LINK); - rc |= create_branch(&branch_emulate_step, - (struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - (unsigned long)emulate_step_addr, - BRANCH_SET_LINK); + rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX, + emulate_step_addr, BRANCH_SET_LINK); if (rc) goto error; - patch_instruction((struct ppc_inst *)(buff + TMPL_CALL_HDLR_IDX), - branch_op_callback); - patch_instruction((struct ppc_inst *)(buff + TMPL_EMULATE_IDX), - branch_emulate_step); + patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback); + patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step); /* * 3. load instruction to be emulated into relevant register, and */ - temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + if (IS_ENABLED(CONFIG_PPC64)) { + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); + } else { + patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); + } /* * 4. branch back from trampoline */ - patch_branch((struct ppc_inst *)(buff + TMPL_RET_IDX), (unsigned long)nip, 0); + patch_branch(buff + TMPL_RET_IDX, nip, 0); - flush_icache_range((unsigned long)buff, - (unsigned long)(&buff[TMPL_END_IDX])); + flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX])); op->optinsn.insn = buff; return 0; error: - free_ppc_optinsn_slot(buff, 0); + free_optinsn_slot(buff, 0); return -ERANGE; } @@ -328,12 +278,9 @@ void arch_optimize_kprobes(struct list_head *oplist) * Backup instructions which will be replaced * by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr, - RELATIVEJUMP_SIZE); - create_branch(&instr, - (struct ppc_inst *)op->kp.addr, - (unsigned long)op->optinsn.insn, 0); - patch_instruction((struct ppc_inst *)op->kp.addr, instr); + memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE); + create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0); + patch_instruction(op->kp.addr, instr); list_del_init(&op->list); } } @@ -343,8 +290,7 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op) arch_arm_kprobe(&op->kp); } -void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) +void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list) { struct optimized_kprobe *op; struct optimized_kprobe *tmp; @@ -355,8 +301,7 @@ void arch_unoptimize_kprobes(struct list_head *oplist, } } -int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 7f5aae3c387d..9bd30cac852b 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -346,10 +346,8 @@ void copy_mm_to_paca(struct mm_struct *mm) #ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; - get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); - get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), LOW_SLICE_ARRAY_SZ); memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8935c5696bce..185beb290580 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -96,7 +96,8 @@ static void check_if_tm_restore_required(struct task_struct *tsk) if (tsk == current && tsk->thread.regs && MSR_TM_ACTIVE(tsk->thread.regs->msr) && !test_thread_flag(TIF_RESTORE_TM)) { - tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr; + regs_set_return_msr(&tsk->thread.ckpt_regs, + tsk->thread.regs->msr); set_thread_flag(TIF_RESTORE_TM); } } @@ -161,7 +162,7 @@ static void __giveup_fpu(struct task_struct *tsk) msr &= ~(MSR_FP|MSR_FE0|MSR_FE1); if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_fpu(struct task_struct *tsk) @@ -244,7 +245,7 @@ static void __giveup_altivec(struct task_struct *tsk) msr &= ~MSR_VEC; if (cpu_has_feature(CPU_FTR_VSX)) msr &= ~MSR_VSX; - tsk->thread.regs->msr = msr; + regs_set_return_msr(tsk->thread.regs, msr); } void giveup_altivec(struct task_struct *tsk) @@ -559,7 +560,7 @@ void notrace restore_math(struct pt_regs *regs) msr_check_and_clear(new_msr); - regs->msr |= new_msr | fpexc_mode; + regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode); } } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1114,7 +1115,7 @@ void restore_tm_state(struct pt_regs *regs) #endif restore_math(regs); - regs->msr |= msr_diff; + regs_set_return_msr(regs, regs->msr | msr_diff); } #else /* !CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -1129,6 +1130,10 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_ALTIVEC)) t->vrsave = mfspr(SPRN_VRSAVE); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE)) + t->spefscr = mfspr(SPRN_SPEFSCR); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) t->dscr = mfspr(SPRN_DSCR); @@ -1159,6 +1164,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->vrsave != new_thread->vrsave) mtspr(SPRN_VRSAVE, new_thread->vrsave); #endif +#ifdef CONFIG_SPE + if (cpu_has_feature(CPU_FTR_SPE) && + old_thread->spefscr != new_thread->spefscr) + mtspr(SPRN_SPEFSCR, new_thread->spefscr); +#endif #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; @@ -1213,6 +1223,19 @@ struct task_struct *__switch_to(struct task_struct *prev, __flush_tlb_pending(batch); batch->active = 0; } + + /* + * On POWER9 the copy-paste buffer can only paste into + * foreign real addresses, so unprivileged processes can not + * see the data or use it in any way unless they have + * foreign real mappings. If the new process has the foreign + * real address mappings, we must issue a cp_abort to clear + * any state and prevent snooping, corruption or a covert + * channel. ISA v3.1 supports paste into local memory. + */ + if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) || + atomic_read(&new->mm->context.vas_windows))) + asm volatile(PPC_CP_ABORT); #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1248,43 +1271,48 @@ struct task_struct *__switch_to(struct task_struct *prev, } /* - * Call restore_sprs() before calling _switch(). If we move it after - * _switch() then we miss out on calling it for new tasks. The reason - * for this is we manually create a stack frame for new tasks that - * directly returns through ret_from_fork() or + * Call restore_sprs() and set_return_regs_changed() before calling + * _switch(). If we move it after _switch() then we miss out on calling + * it for new tasks. The reason for this is we manually create a stack + * frame for new tasks that directly returns through ret_from_fork() or * ret_from_kernel_thread(). See copy_thread() for details. */ restore_sprs(old_thread, new_thread); + set_return_regs_changed(); /* _switch changes stack (and regs) */ + #ifdef CONFIG_PPC32 kuap_assert_locked(); #endif last = _switch(old_thread, new_thread); + /* + * Nothing after _switch will be run for newly created tasks, + * because they switch directly to ret_from_fork/ret_from_kernel_thread + * etc. Code added here should have a comment explaining why that is + * okay. + */ + #ifdef CONFIG_PPC_BOOK3S_64 + /* + * This applies to a process that was context switched while inside + * arch_enter_lazy_mmu_mode(), to re-activate the batch that was + * deactivated above, before _switch(). This will never be the case + * for new tasks. + */ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } - if (current->thread.regs) { + /* + * Math facilities are masked out of the child MSR in copy_thread. + * A new task does not need to restore_math because it will + * demand fault them. + */ + if (current->thread.regs) restore_math(current->thread.regs); - - /* - * On POWER9 the copy-paste buffer can only paste into - * foreign real addresses, so unprivileged processes can not - * see the data or use it in any way unless they have - * foreign real mappings. If the new process has the foreign - * real address mappings, we must issue a cp_abort to clear - * any state and prevent snooping, corruption or a covert - * channel. ISA v3.1 supports paste into local memory. - */ - if (current->mm && - (cpu_has_feature(CPU_FTR_ARCH_31) || - atomic_read(¤t->mm->context.vas_windows))) - asm volatile(PPC_CP_ABORT); - } #endif /* CONFIG_PPC_BOOK3S_64 */ return last; @@ -1736,6 +1764,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_ALTIVEC p->thread.vr_save_area = NULL; #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + p->thread.kuap = KUAP_NONE; +#endif setup_ksp_vsid(p, sp); @@ -1838,13 +1869,14 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) } regs->gpr[2] = toc; } - regs->nip = entry; - regs->msr = MSR_USER64; + regs_set_return_ip(regs, entry); + regs_set_return_msr(regs, MSR_USER64); } else { - regs->nip = start; regs->gpr[2] = 0; - regs->msr = MSR_USER32; + regs_set_return_ip(regs, start); + regs_set_return_msr(regs, MSR_USER32); } + #endif #ifdef CONFIG_VSX current->thread.used_vsr = 0; @@ -1875,7 +1907,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - } EXPORT_SYMBOL(start_thread); @@ -1923,9 +1954,10 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val) if (val > PR_FP_EXC_PRECISE) return -EINVAL; tsk->thread.fpexc_mode = __pack_fe01(val); - if (regs != NULL && (regs->msr & MSR_FP) != 0) - regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) - | tsk->thread.fpexc_mode; + if (regs != NULL && (regs->msr & MSR_FP) != 0) { + regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode); + } return 0; } @@ -1971,9 +2003,9 @@ int set_endian(struct task_struct *tsk, unsigned int val) return -EINVAL; if (val == PR_ENDIAN_BIG) - regs->msr &= ~MSR_LE; + regs_set_return_msr(regs, regs->msr & ~MSR_LE); else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE) - regs->msr |= MSR_LE; + regs_set_return_msr(regs, regs->msr | MSR_LE); else return -EINVAL; @@ -2121,8 +2153,9 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack(struct task_struct *tsk, unsigned long *stack, - const char *loglvl) +void __no_sanitize_address show_stack(struct task_struct *tsk, + unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index fbe9deebc8e1..f620e04dc9bf 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -758,7 +758,7 @@ void __init early_init_devtree(void *params) first_memblock_size = min_t(u64, first_memblock_size, memory_limit); setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ - memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); + memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ if (PHYSICAL_START > MEMORY_START) memblock_reserve(MEMORY_START, 0x8000); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 41ed7e33d897..a5bf355ce1d6 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -27,10 +27,12 @@ #include <linux/initrd.h> #include <linux/bitops.h> #include <linux/pgtable.h> +#include <linux/printk.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/page.h> #include <asm/processor.h> +#include <asm/interrupt.h> #include <asm/irq.h> #include <asm/io.h> #include <asm/smp.h> @@ -242,13 +244,31 @@ static int __init prom_strcmp(const char *cs, const char *ct) return 0; } -static char __init *prom_strcpy(char *dest, const char *src) +static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n) { - char *tmp = dest; + ssize_t rc; + size_t i; - while ((*dest++ = *src++) != '\0') - /* nothing */; - return tmp; + if (n == 0 || n > INT_MAX) + return -E2BIG; + + // Copy up to n bytes + for (i = 0; i < n && src[i] != '\0'; i++) + dest[i] = src[i]; + + rc = i; + + // If we copied all n then we have run out of space for the nul + if (rc == n) { + // Rewind by one character to ensure nul termination + i--; + rc = -E2BIG; + } + + for (; i < n; i++) + dest[i] = '\0'; + + return rc; } static int __init prom_strncmp(const char *cs, const char *ct, size_t count) @@ -701,13 +721,12 @@ static int __init prom_setprop(phandle node, const char *nodename, } /* We can't use the standard versions because of relocation headaches. */ -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) +#define prom_isxdigit(c) \ + (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F')) -#define isdigit(c) ('0' <= (c) && (c) <= '9') -#define islower(c) ('a' <= (c) && (c) <= 'z') -#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c)) +#define prom_isdigit(c) ('0' <= (c) && (c) <= '9') +#define prom_islower(c) ('a' <= (c) && (c) <= 'z') +#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) static unsigned long prom_strtoul(const char *cp, const char **endp) { @@ -716,14 +735,14 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) if (*cp == '0') { base = 8; cp++; - if (toupper(*cp) == 'X') { + if (prom_toupper(*cp) == 'X') { cp++; base = 16; } } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { + while (prom_isxdigit(*cp) && + (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) { result = result * base + value; cp++; } @@ -927,6 +946,10 @@ struct option_vector6 { u8 os_name; } __packed; +struct option_vector7 { + u8 os_id[256]; +} __packed; + struct ibm_arch_vec { struct { u32 mask, val; } pvrs[14]; @@ -949,6 +972,9 @@ struct ibm_arch_vec { u8 vec6_len; struct option_vector6 vec6; + + u8 vec7_len; + struct option_vector7 vec7; } __packed; static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { @@ -1095,6 +1121,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .secondary_pteg = 0, .os_name = OV6_LINUX, }, + + /* option vector 7: OS Identification */ + .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)), }; static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned; @@ -1323,6 +1352,8 @@ static void __init prom_check_platform_support(void) memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, sizeof(ibm_architecture_vec)); + prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256); + if (prop_len > 1) { int i; u8 vec[8]; @@ -1762,6 +1793,8 @@ static int prom_rtas_hcall(uint64_t args) asm volatile("sc 1\n" : "=r" (arg1) : "r" (arg1), "r" (arg2) :); + srr_regs_clobbered(); + return arg1; } @@ -2702,7 +2735,7 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - prom_strcpy(namep, "phandle"); + prom_strscpy_pad(namep, "phandle", sizeof("phandle")); mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ @@ -3210,54 +3243,6 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) #endif /* CONFIG_BLK_DEV_INITRD */ } -#ifdef CONFIG_PPC64 -#ifdef CONFIG_RELOCATABLE -static void reloc_toc(void) -{ -} - -static void unreloc_toc(void) -{ -} -#else -static void __reloc_toc(unsigned long offset, unsigned long nr_entries) -{ - unsigned long i; - unsigned long *toc_entry; - - /* Get the start of the TOC by using r2 directly. */ - asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry)); - - for (i = 0; i < nr_entries; i++) { - *toc_entry = *toc_entry + offset; - toc_entry++; - } -} - -static void reloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - __reloc_toc(offset, nr_entries); - - mb(); -} - -static void unreloc_toc(void) -{ - unsigned long offset = reloc_offset(); - unsigned long nr_entries = - (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long); - - mb(); - - __reloc_toc(-offset, nr_entries); -} -#endif -#endif - #ifdef CONFIG_PPC_SVM /* * Perform the Enter Secure Mode ultracall. @@ -3291,14 +3276,12 @@ static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt) * relocated it so the check will fail. Restore the original image by * relocating it back to the kernel virtual base address. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(KERNELBASE); + relocate(KERNELBASE); ret = enter_secure_mode(kbase, fdt); /* Relocate the kernel again. */ - if (IS_ENABLED(CONFIG_RELOCATABLE)) - relocate(kbase); + relocate(kbase); if (ret != U_SUCCESS) { prom_printf("Returned %d from switching to secure mode.\n", ret); @@ -3326,8 +3309,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 unsigned long offset = reloc_offset(); reloc_got2(offset); -#else - reloc_toc(); #endif /* @@ -3504,8 +3485,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #ifdef CONFIG_PPC32 reloc_got2(-offset); -#else - unreloc_toc(); #endif /* Move to secure memory if we're supposed to be secure guests. */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c index 3990c01ef8cf..399f5d94a3df 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-adv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_BT; task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -24,7 +24,7 @@ void user_enable_block_step(struct task_struct *task) if (regs != NULL) { task->thread.debug.dbcr0 &= ~DBCR0_IC; task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); } set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -50,7 +50,7 @@ void user_disable_single_step(struct task_struct *task) * All debug events were off..... */ task->thread.debug.dbcr0 &= ~DBCR0_IDM; - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); } } clear_tsk_thread_flag(task, TIF_SINGLESTEP); @@ -82,6 +82,7 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned long addr, int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data) { + struct pt_regs *regs = task->thread.regs; #ifdef CONFIG_HAVE_HW_BREAKPOINT int ret; struct thread_struct *thread = &task->thread; @@ -112,7 +113,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W); if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0, task->thread.debug.dbcr1)) { - task->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); task->thread.debug.dbcr0 &= ~DBCR0_IDM; } return 0; @@ -132,7 +133,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned l dbcr_dac(task) |= DBCR_DAC1R; if (data & 0x2UL) dbcr_dac(task) |= DBCR_DAC1W; - task->thread.regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return 0; } @@ -220,7 +221,7 @@ static long set_instruction_bp(struct task_struct *child, } out: child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot; } @@ -336,7 +337,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) return -ENOSPC; } child->thread.debug.dbcr0 |= DBCR0_IDM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return slot + 4; } @@ -430,7 +431,7 @@ static int set_dac_range(struct task_struct *child, child->thread.debug.dbcr2 |= DBCR2_DAC12MX; else /* PPC_BREAKPOINT_MODE_MASK */ child->thread.debug.dbcr2 |= DBCR2_DAC12MM; - child->thread.regs->msr |= MSR_DE; + regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE); return 5; } @@ -485,7 +486,8 @@ long ppc_del_hwdebug(struct task_struct *child, long data) if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0, child->thread.debug.dbcr1)) { child->thread.debug.dbcr0 &= ~DBCR0_IDM; - child->thread.regs->msr &= ~MSR_DE; + regs_set_return_msr(child->thread.regs, + child->thread.regs->msr & ~MSR_DE); } } return rc; diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c index aa36fcad36cd..a5dd7d2e2c9e 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c +++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c @@ -11,10 +11,8 @@ void user_enable_single_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_BE; - regs->msr |= MSR_SE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -22,10 +20,8 @@ void user_enable_block_step(struct task_struct *task) { struct pt_regs *regs = task->thread.regs; - if (regs != NULL) { - regs->msr &= ~MSR_SE; - regs->msr |= MSR_BE; - } + if (regs != NULL) + regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE); set_tsk_thread_flag(task, TIF_SINGLESTEP); } @@ -34,7 +30,7 @@ void user_disable_single_step(struct task_struct *task) struct pt_regs *regs = task->thread.regs; if (regs != NULL) - regs->msr &= ~(MSR_SE | MSR_BE); + regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE)); clear_tsk_thread_flag(task, TIF_SINGLESTEP); } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 773bcc4ca843..b8be1d6668b5 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -113,8 +113,9 @@ static unsigned long get_user_msr(struct task_struct *task) static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr) { - task->thread.regs->msr &= ~MSR_DEBUGCHANGE; - task->thread.regs->msr |= msr & MSR_DEBUGCHANGE; + unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) | + (msr & MSR_DEBUGCHANGE); + regs_set_return_msr(task->thread.regs, newmsr); return 0; } diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index a28239b8b0c0..33c07c8af6c8 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -12,7 +12,7 @@ #define MAX_RTC_WAIT 5000 /* 5 sec */ -#define RTAS_CLOCK_BUSY (-2) + time64_t __init rtas_get_boot_time(void) { int ret[8]; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 6bada744402b..99f2cce635fb 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/syscalls.h> +#include <asm/interrupt.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/hvcall.h> @@ -46,6 +47,13 @@ /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); +static inline void do_enter_rtas(unsigned long args) +{ + enter_rtas(args); + + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED }; @@ -384,7 +392,7 @@ static char *__fetch_rtas_last_error(char *altbuf) save_args = rtas.args; rtas.args = err_args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); err_args = rtas.args; rtas.args = save_args; @@ -430,7 +438,7 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - enter_rtas(__pa(args)); + do_enter_rtas(__pa(args)); } void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) @@ -1138,7 +1146,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) flags = lock_rtas(); rtas.args = args; - enter_rtas(__pa(&rtas.args)); + do_enter_rtas(__pa(&rtas.args)); args = rtas.args; /* A -1 return code indicates that the last command couldn't diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index c17d1c9362b5..cc51fa52e783 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -300,9 +300,7 @@ static void stf_barrier_enable(bool enable) void setup_stf_barrier(void) { enum stf_barrier_type type; - bool enable, hv; - - hv = cpu_has_feature(CPU_FTR_HVMODE); + bool enable; /* Default to fallback in case fw-features are not available */ if (cpu_has_feature(CPU_FTR_ARCH_300)) @@ -315,8 +313,7 @@ void setup_stf_barrier(void) type = STF_BARRIER_NONE; enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || - (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + security_ftr_enabled(SEC_FTR_STF_BARRIER); if (type == STF_BARRIER_FALLBACK) { pr_info("stf-barrier: fallback barrier available\n"); @@ -439,8 +436,8 @@ static void update_branch_cache_flush(void) site2 = &patch__call_kvm_flush_link_stack_p9; // This controls the branch from guest_exit_cont to kvm_flush_link_stack if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) { - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); - patch_instruction_site(site2, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); + patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP())); } else { // Could use HW flush, but that could also flush count cache patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); @@ -450,11 +447,11 @@ static void update_branch_cache_flush(void) // Patch out the bcctr first, then nop the rest site = &patch__call_flush_branch_caches3; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches2; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); site = &patch__call_flush_branch_caches1; - patch_instruction_site(site, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(site, ppc_inst(PPC_RAW_NOP())); // This controls the branch from _switch to flush_branch_caches if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE && @@ -477,12 +474,12 @@ static void update_branch_cache_flush(void) // If we just need to flush the link stack, early return if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) { patch_instruction_site(&patch__flush_link_stack_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); // If we have flush instruction, early return } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) { patch_instruction_site(&patch__flush_count_cache_return, - ppc_inst(PPC_INST_BLR)); + ppc_inst(PPC_RAW_BLR())); } } } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 046fe21b5c3b..26328fd2990c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -92,8 +92,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); int dcache_bsize; int icache_bsize; -unsigned long klimit = (unsigned long) _end; - /* * This still seems to be needed... -- paulus */ @@ -931,7 +929,7 @@ void __init setup_arch(char **cmdline_p) init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; + init_mm.brk = (unsigned long)_end; mm_iommu_init(&init_mm); irqstack_early_init(); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d7c1f92152af..7ec5c47fce0e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); */ notrace void __init machine_init(u64 dt_ptr) { - struct ppc_inst *addr = (struct ppc_inst *)patch_site_addr(&patch__memset_nocache); + u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); struct ppc_inst insn; /* Configure static keys first, now that we're relocated. */ @@ -85,7 +85,7 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP)); + patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP())); create_cond_branch(&insn, addr, branch_target(addr), 0x820000); patch_instruction(addr, insn); /* replace b by bne cr0 */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a35fbf4d0bce..1ff258f6c76c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -33,6 +33,7 @@ #include <linux/pgtable.h> #include <asm/debugfs.h> +#include <asm/kvm_guest.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh) * disable it by default. Book3S has a soft-nmi hardlockup detector based * on the decrementer interrupt, so it does not suffer from this problem. * - * It is likely to get false positives in VM guests, so disable it there - * by default too. + * It is likely to get false positives in KVM guests, so disable it there + * by default too. PowerVM will not stop or arbitrarily oversubscribe + * CPUs, but give a minimum regular allotment even with SPLPAR, so enable + * the detector for non-KVM guests, assume PowerVM. */ static int __init disable_hardlockup_detector(void) { #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF hardlockup_detector_disable(); #else - if (firmware_has_feature(FW_FEATURE_LPAR)) - hardlockup_detector_disable(); + if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (is_kvm_guest()) + hardlockup_detector_disable(); + } #endif return 0; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 9ded046edb0e..e600764a926c 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -214,7 +214,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, regs->gpr[0] = __NR_restart_syscall; else regs->gpr[3] = regs->orig_gpr3; - regs->nip -= 4; + regs_add_return_ip(regs, -4); regs->result = 0; } else { if (trap_is_scv(regs)) { @@ -322,16 +322,16 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * For signals taken in non-TM or suspended mode, we use the * normal/non-checkpointed stack pointer. */ - - unsigned long ret = tsk->thread.regs->gpr[1]; + struct pt_regs *regs = tsk->thread.regs; + unsigned long ret = regs->gpr[1]; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); - if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + if (MSR_TM_ACTIVE(regs->msr)) { preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); - if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) + if (MSR_TM_TRANSACTIONAL(regs->msr)) ret = tsk->thread.ckpt_regs.gpr[1]; /* @@ -341,7 +341,7 @@ static unsigned long get_tm_stackpointer(struct task_struct *tsk) * (tm_recheckpoint_new_task() would recheckpoint). Besides, we * enter the signal handler in non-transactional state. */ - tsk->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); preempt_enable(); } #endif diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 8f05ed0da292..0608581967f0 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -354,14 +354,8 @@ static void prepare_save_tm_user_regs(void) { WARN_ON(tm_suspend_disabled); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); -#endif -#ifdef CONFIG_SPE - if (current->thread.used_spe) - flush_spe_to_thread(current); -#endif } static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, @@ -379,7 +373,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user */ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed); -#ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { unsafe_copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, @@ -412,7 +405,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed); if (msr & MSR_FP) @@ -420,7 +412,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user else unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed); -#ifdef CONFIG_VSX /* * Copy VSR 0-31 upper half from thread_struct to local * buffer, then write that to userspace. Also set MSR_VSX in @@ -436,23 +427,6 @@ static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user msr |= MSR_VSX; } -#endif /* CONFIG_VSX */ -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in __unsafe_save_user_regs(). - */ - if (current->thread.used_spe) { - unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr, - ELF_NEVRREG * sizeof(u32), failed); - /* set MSR_SPE in the saved MSR value to indicate that - * frame->mc_vregs contains valid data */ - msr |= MSR_SPE; - } - - /* We always copy to/from spefscr */ - unsafe_put_user(current->thread.spefscr, - (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed); @@ -505,14 +479,14 @@ static long restore_user_regs(struct pt_regs *regs, /* if doing signal return, restore the previous little-endian mode */ if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); #ifdef CONFIG_ALTIVEC /* * Force the process to reload the altivec registers from * current->thread when it next does altivec instructions */ - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, @@ -534,7 +508,7 @@ static long restore_user_regs(struct pt_regs *regs, * Force the process to reload the VSX registers from * current->thread when it next does VSX instruction. */ - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -550,12 +524,12 @@ static long restore_user_regs(struct pt_regs *regs, * force the process to reload the FP registers from * current->thread when it next does FP instructions */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE /* force the process to reload the spe registers from current->thread when it next does spe instructions */ - regs->msr &= ~MSR_SPE; + regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, @@ -587,9 +561,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *tm_sr) { unsigned long msr, msr_hi; -#ifdef CONFIG_VSX int i; -#endif if (tm_suspend_disabled) return 1; @@ -608,10 +580,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed); /* Restore the previous little-endian mode */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); -#ifdef CONFIG_ALTIVEC - regs->msr &= ~MSR_VEC; + regs_set_return_msr(regs, regs->msr & ~MSR_VEC); if (msr & MSR_VEC) { /* restore altivec registers from the stack */ unsafe_copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, @@ -629,14 +600,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, (u32 __user *)&sr->mc_vregs[32], failed); if (cpu_has_feature(CPU_FTR_ALTIVEC)) mtspr(SPRN_VRSAVE, current->thread.ckvrsave); -#endif /* CONFIG_ALTIVEC */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed); -#ifdef CONFIG_VSX - regs->msr &= ~MSR_VSX; + regs_set_return_msr(regs, regs->msr & ~MSR_VSX); if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -649,24 +618,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } -#endif /* CONFIG_VSX */ - -#ifdef CONFIG_SPE - /* SPE regs are not checkpointed with TM, so this section is - * simply the same as in restore_user_regs(). - */ - regs->msr &= ~MSR_SPE; - if (msr & MSR_SPE) { - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); - current->thread.used_spe = true; - } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); - - /* Always get SPEFSCR back */ - unsafe_get_user(current->thread.spefscr, - (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); -#endif /* CONFIG_SPE */ user_read_access_end(); @@ -675,7 +626,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_restore_general_regs(regs, tm_sr, failed); -#ifdef CONFIG_ALTIVEC /* restore altivec registers from the stack */ if (msr & MSR_VEC) unsafe_copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, @@ -684,11 +634,9 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Always get VRSAVE back */ unsafe_get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32], failed); -#endif /* CONFIG_ALTIVEC */ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed); -#ifdef CONFIG_VSX if (msr & MSR_VSX) { /* * Restore altivec registers from the stack to a local @@ -697,7 +645,6 @@ static long restore_tm_user_regs(struct pt_regs *regs, unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed); current->thread.used_vsr = true; } -#endif /* CONFIG_VSX */ /* Get the top half of the MSR from the user context */ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed); @@ -725,7 +672,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, * * Pull in the MSR TM bits from the user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK); + regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK)); /* Now, recheckpoint. This loads up all of the checkpointed (older) * registers, including FP and V[S]Rs. After recheckpointing, the * transactional versions should be loaded. @@ -740,14 +687,12 @@ static long restore_tm_user_regs(struct pt_regs *regs, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(¤t->thread.fp_state); - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode)); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { load_vr_state(¤t->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } -#endif preempt_enable(); @@ -828,10 +773,8 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_rt_sigreturn, &mctx->mc_pad[0], - failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed); @@ -858,10 +801,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[4] = (unsigned long)&frame->info; regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long)frame; - regs->nip = (unsigned long) ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -926,9 +869,8 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32); } else { tramp = (unsigned long)mctx->mc_pad; - /* Set up the sigreturn trampoline: li r0,sigret; sc */ - unsafe_put_user(PPC_INST_ADDI + __NR_sigreturn, &mctx->mc_pad[0], failed); - unsafe_put_user(PPC_INST_SC, &mctx->mc_pad[1], failed); + unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed); + unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed); asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0])); } user_access_end(); @@ -947,10 +889,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long)ksig->ka.sa.sa_handler; + regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler); /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); + return 0; failed: @@ -1200,7 +1142,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * set, and recheckpoint was not called. This avoid * hitting a TM Bad thing at RFID */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); } /* Fall through, for non-TM restore */ #endif @@ -1289,7 +1231,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, affect the contents of these registers. After this point, failure is a problem, anyway, and it's very unlikely unless the user is really doing something wrong. */ - regs->msr = new_msr; + regs_set_return_msr(regs, new_msr); #ifdef CONFIG_PPC_ADV_DEBUG_REGS current->thread.debug.dbcr0 = new_dbcr0; #endif diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index f9e1f5428b9e..1831bba0582e 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -354,7 +354,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ /* get MSR separately, transfer the LE bit if doing signal return */ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out); if (sig) - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out); unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out); unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out); @@ -376,7 +376,7 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC unsafe_get_user(v_regs, &sc->v_regs, efault_out); @@ -468,7 +468,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, return -EINVAL; /* pull in MSR LE from user context */ - regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE)); /* The following non-GPR non-FPR non-VR state is also checkpointed: */ err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]); @@ -495,7 +495,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ - regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); + regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); #ifdef CONFIG_ALTIVEC err |= __get_user(v_regs, &sc->v_regs); @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr |= msr & MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK)); /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -583,7 +583,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, * to be de-scheduled with MSR[TS] set but without calling * tm_recheckpoint(). This can cause a bug. */ - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&tsk->thread); @@ -591,11 +591,11 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { load_fp_state(&tsk->thread.fp_state); - regs->msr |= (MSR_FP | tsk->thread.fpexc_mode); + regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode)); } if (msr & MSR_VEC) { load_vr_state(&tsk->thread.vr_state); - regs->msr |= MSR_VEC; + regs_set_return_msr(regs, regs->msr | MSR_VEC); } preempt_enable(); @@ -618,15 +618,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) int i; long err = 0; - /* bctrl # call the handler */ - err |= __put_user(PPC_INST_BCTRL, &tramp[0]); - /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ - err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) | - (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]); - /* li r0, __NR_[rt_]sigreturn| */ - err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]); - /* sc */ - err |= __put_user(PPC_INST_SC, &tramp[3]); + /* Call the handler and pop the dummy stackframe*/ + err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]); + err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]); + + err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]); + err |= __put_user(PPC_RAW_SC(), &tramp[3]); /* Minimal traceback info */ for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) @@ -720,6 +717,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, /* This returns like rt_sigreturn */ set_thread_flag(TIF_RESTOREALL); + return 0; efault_out: @@ -786,7 +784,7 @@ SYSCALL_DEFINE0(rt_sigreturn) * the MSR[TS] that came from user context later, at * restore_tm_sigcontexts. */ - regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK); if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; @@ -818,7 +816,8 @@ SYSCALL_DEFINE0(rt_sigreturn) * MSR[TS] set, but without CPU in the proper state, * causing a TM bad thing. */ - current->thread.regs->msr &= ~MSR_TS_MASK; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr & ~MSR_TS_MASK); if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext))) goto badframe; @@ -832,6 +831,7 @@ SYSCALL_DEFINE0(rt_sigreturn) goto badframe; set_thread_flag(TIF_RESTOREALL); + return 0; badframe_block: @@ -911,12 +911,12 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, /* Set up to return from userspace. */ if (tsk->mm->context.vdso) { - regs->nip = VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64); + regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64)); } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) goto badframe; - regs->nip = (unsigned long) &frame->tramp[0]; + regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]); } /* Allocate a dummy caller frame for the signal handler. */ @@ -941,14 +941,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, } /* enter the signal handler in native-endian mode */ - regs->msr &= ~MSR_LE; - regs->msr |= (MSR_KERNEL & MSR_LE); + regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE)); regs->gpr[1] = newsp; regs->gpr[3] = ksig->sig; regs->result = 0; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); - err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[4] = (unsigned long)&frame->info; + regs->gpr[5] = (unsigned long)&frame->uc; regs->gpr[6] = (unsigned long) frame; } else { regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7ddc2d32c39e..447b78a87c8f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -619,6 +619,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) /* * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -634,6 +636,15 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { hard_irq_disable(); + + /* + * Offlining CPUs in stop_this_cpu can result in scheduler warnings, + * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants + * to know other CPUs are offline before it breaks locks to flush + * printk buffers, in case we panic()ed while holding the lock. + */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -1541,6 +1552,10 @@ void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); + /* PPC64 calls setup_kup() in early_setup_secondary() */ + if (IS_ENABLED(CONFIG_PPC32)) + setup_kup(); + mmgrab(&init_mm); current->active_mm = &init_mm; diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 1deb1bf331dd..2b0d04a1b7d2 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -23,8 +23,8 @@ #include <asm/paca.h> -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { unsigned long sp; @@ -61,8 +61,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, * * If the task is not 'current', the caller *must* ensure the task is inactive. */ -int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, - void *cookie, struct task_struct *task) +int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { unsigned long sp; unsigned long newsp; @@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs) static void raise_backtrace_ipi(cpumask_t *mask) { + struct paca_struct *p; unsigned int cpu; + u64 delay_us; for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { handle_backtrace_ipi(NULL); - else - smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); - } + continue; + } - for_each_cpu(cpu, mask) { - struct paca_struct *p = paca_ptrs[cpu]; + delay_us = 5 * USEC_PER_SEC; + + if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) { + // Now wait up to 5s for the other CPU to do its backtrace + while (cpumask_test_cpu(cpu, mask) && delay_us) { + udelay(1); + delay_us--; + } + + // Other CPU cleared itself from the mask + if (delay_us) + continue; + } + + p = paca_ptrs[cpu]; cpumask_clear_cpu(cpu, mask); diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index a552c9e68d7e..bf4ae0f0e36c 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -114,7 +114,8 @@ SYSCALL_DEFINE0(switch_endian) { struct thread_info *ti; - current->thread.regs->msr ^= MSR_LE; + regs_set_return_msr(current->thread.regs, + current->thread.regs->msr ^ MSR_LE); /* * Set TIF_RESTOREALL so that r3 isn't clobbered on return to diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 2e08640bb3b4..5ff0e55d0db1 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -843,14 +843,14 @@ static int register_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -858,7 +858,7 @@ static int register_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif @@ -940,14 +940,14 @@ static int unregister_cpu_online(unsigned int cpu) #ifdef HAS_PPC_PMC_IBM case PPC_PMC_IBM: attrs = ibm_common_attrs; - nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(ibm_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_IBM */ #ifdef HAS_PPC_PMC_G4 case PPC_PMC_G4: attrs = g4_common_attrs; - nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(g4_common_attrs); pmc_attrs = classic_pmc_attrs; break; #endif /* HAS_PPC_PMC_G4 */ @@ -955,7 +955,7 @@ static int unregister_cpu_online(unsigned int cpu) case PPC_PMC_PA6T: /* PA Semi starts counting at PMC0 */ attrs = pa6t_attrs; - nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute); + nattrs = ARRAY_SIZE(pa6t_attrs); pmc_attrs = NULL; break; #endif diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 6c31af7f4fa8..b9a047d92ec0 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -201,7 +201,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index da995c5fb97d..e45ce427bffb 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb) void notrace accumulate_stolen_time(void) { u64 sst, ust; - unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; - /* We are called early in the exception entry, before - * soft/hard_enabled are sync'ed to the expected state - * for the exception. We are hard disabled but the PACA - * needs to reflect that so various debug stuff doesn't - * complain - */ - irq_soft_mask_set(IRQS_DISABLED); - sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); acct->stime -= sst; acct->utime -= ust; acct->steal_time += ust + sst; - - irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ffe9537195aa..d89c5df4f206 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -49,7 +49,7 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) addr = ppc_function_entry((void *)addr); /* if (link) set op to 'bl' else 'b' */ - create_branch(&op, (struct ppc_inst *)ip, addr, link ? 1 : 0); + create_branch(&op, (u32 *)ip, addr, link ? 1 : 0); return op; } @@ -79,7 +79,7 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) } /* replace the text with the new text */ - if (patch_instruction((struct ppc_inst *)ip, new)) + if (patch_instruction((u32 *)ip, new)) return -EPERM; return 0; @@ -94,7 +94,7 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr) addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ - return create_branch(&op, (struct ppc_inst *)ip, addr, 0) == 0; + return create_branch(&op, (u32 *)ip, addr, 0) == 0; } static int is_bl_op(struct ppc_inst op) @@ -162,7 +162,7 @@ __ftrace_make_nop(struct module *mod, #ifdef CONFIG_MPROFILE_KERNEL /* When using -mkernel_profile there is no load to jump over */ - pop = ppc_inst(PPC_INST_NOP); + pop = ppc_inst(PPC_RAW_NOP()); if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { pr_err("Fetching instruction at %lx failed.\n", ip - 4); @@ -170,7 +170,7 @@ __ftrace_make_nop(struct module *mod, } /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) && + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { pr_err("Unexpected instruction %s around bl _mcount\n", ppc_inst_as_str(op)); @@ -203,12 +203,12 @@ __ftrace_make_nop(struct module *mod, } if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); + pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); return -EINVAL; } #endif /* CONFIG_MPROFILE_KERNEL */ - if (patch_instruction((struct ppc_inst *)ip, pop)) { + if (patch_instruction((u32 *)ip, pop)) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -278,9 +278,9 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } - op = ppc_inst(PPC_INST_NOP); + op = ppc_inst(PPC_RAW_NOP()); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction((u32 *)ip, op)) return -EPERM; return 0; @@ -380,7 +380,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) return -1; } - if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) { + if (patch_branch((u32 *)tramp, ptr, 0)) { pr_debug("REL24 out of range!\n"); return -1; } @@ -424,7 +424,7 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) } } - if (patch_instruction((struct ppc_inst *)ip, ppc_inst(PPC_INST_NOP))) { + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { pr_err("Patching NOP failed.\n"); return -EPERM; } @@ -446,7 +446,7 @@ int ftrace_make_nop(struct module *mod, if (test_24bit_addr(ip, addr)) { /* within range */ old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_INST_NOP); + new = ppc_inst(PPC_RAW_NOP()); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) return __ftrace_make_nop_kernel(rec, addr); @@ -510,7 +510,7 @@ static int expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ - if (!ppc_inst_equal(op0, ppc_inst(PPC_INST_NOP))) + if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) return 0; return 1; } @@ -589,14 +589,14 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; struct ppc_inst op; - unsigned long ip = rec->ip; + u32 *ip = (u32 *)rec->ip; /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) + if (copy_inst_from_kernel_nofault(&op, ip)) return -EFAULT; /* It should be pointing to a nop */ - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op)); return -EINVAL; } @@ -608,8 +608,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* create the branch to the trampoline */ - err = create_branch(&op, (struct ppc_inst *)ip, - rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -617,7 +616,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) pr_devel("write to %lx\n", rec->ip); - if (patch_instruction((struct ppc_inst *)ip, op)) + if (patch_instruction(ip, op)) return -EPERM; return 0; @@ -653,7 +652,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) return -EFAULT; } - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) { + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); return -EINVAL; } @@ -684,7 +683,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) */ if (test_24bit_addr(ip, addr)) { /* within range */ - old = ppc_inst(PPC_INST_NOP); + old = ppc_inst(PPC_RAW_NOP()); new = ftrace_call_replace(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) @@ -762,7 +761,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, /* The new target may be within range */ if (test_24bit_addr(ip, addr)) { /* within range */ - if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -790,12 +789,12 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } /* Ensure branch is within 24 bits */ - if (create_branch(&op, (struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (create_branch(&op, (u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("Branch out of range\n"); return -EINVAL; } - if (patch_branch((struct ppc_inst *)ip, tramp, BRANCH_SET_LINK)) { + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -851,7 +850,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) struct ppc_inst old, new; int ret; - old = ppc_inst_read((struct ppc_inst *)&ftrace_call); + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); @@ -859,7 +858,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) /* Also update the regs callback function */ if (!ret) { ip = (unsigned long)(&ftrace_regs_call); - old = ppc_inst_read((struct ppc_inst *)&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); new = ftrace_call_replace(ip, (unsigned long)func, 1); ret = ftrace_modify_code(ip, old, new); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b4ab95c9e94a..dfbce527c98e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -67,6 +67,7 @@ #include <asm/kprobes.h> #include <asm/stacktrace.h> #include <asm/nmi.h> +#include <asm/disassemble.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -427,7 +428,7 @@ void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; nonrecoverable: - regs->msr &= ~MSR_RI; + regs_set_return_msr(regs, regs->msr & ~MSR_RI); #endif } DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) @@ -537,11 +538,11 @@ static inline int check_io_access(struct pt_regs *regs) * For the debug message, we look at the preceding * load or store. */ - if (*nip == PPC_INST_NOP) + if (*nip == PPC_RAW_NOP()) nip -= 2; - else if (*nip == PPC_INST_ISYNC) + else if (*nip == PPC_RAW_ISYNC()) --nip; - if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) { + if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) { unsigned int rb; --nip; @@ -549,8 +550,8 @@ static inline int check_io_access(struct pt_regs *regs) printk(KERN_DEBUG "%s bad port %lx at %p\n", (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } } @@ -586,8 +587,8 @@ static inline int check_io_access(struct pt_regs *regs) #define REASON_BOUNDARY SRR1_BOUNDARY #define single_stepping(regs) ((regs)->msr & MSR_SE) -#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) -#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE) +#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE)) +#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE)) #endif #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) @@ -1031,7 +1032,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) #endif /* !__LITTLE_ENDIAN__ */ /* Go to next instruction */ - regs->nip += 4; + regs_add_return_ip(regs, 4); } #endif /* CONFIG_VSX */ @@ -1476,7 +1477,7 @@ static void do_program_check(struct pt_regs *regs) if (!(regs->msr & MSR_PR) && /* not user-mode */ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - regs->nip += 4; + regs_add_return_ip(regs, 4); return; } _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); @@ -1538,7 +1539,7 @@ static void do_program_check(struct pt_regs *regs) if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { switch (emulate_instruction(regs)) { case 0: - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); return; case -EFAULT: @@ -1566,7 +1567,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception) */ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { - regs->msr |= REASON_ILLEGAL; + regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL); do_program_check(regs); } @@ -1593,7 +1594,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) if (fixed == 1) { /* skip over emulated instruction */ - regs->nip += inst_length(reason); + regs_add_return_ip(regs, inst_length(reason)); emulate_single_step(regs); return; } @@ -1659,7 +1660,7 @@ static void tm_unavailable(struct pt_regs *regs) #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (user_mode(regs)) { current->thread.load_tm++; - regs->msr |= MSR_TM; + regs_set_return_msr(regs, regs->msr | MSR_TM); tm_enable(); tm_restore_sprs(¤t->thread); return; @@ -1751,7 +1752,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) pr_err("DSCR based mfspr emulation failed\n"); return; } - regs->nip += 4; + regs_add_return_ip(regs, 4); emulate_single_step(regs); } return; @@ -1948,7 +1949,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) */ if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM flag is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -1969,7 +1970,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) * instead of stopping here when hitting a BT */ if (debug_status & DBSR_BT) { - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable BT */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT); @@ -1980,7 +1981,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (user_mode(regs)) { current->thread.debug.dbcr0 &= ~DBCR0_BT; current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC; - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); return; } @@ -1994,7 +1995,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) if (debugger_sstep(regs)) return; } else if (debug_status & DBSR_IC) { /* Instruction complete */ - regs->msr &= ~MSR_DE; + regs_set_return_msr(regs, regs->msr & ~MSR_DE); /* Disable instruction completion */ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); @@ -2016,7 +2017,7 @@ DEFINE_INTERRUPT_HANDLER(DebugException) current->thread.debug.dbcr0 &= ~DBCR0_IC; if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0, current->thread.debug.dbcr1)) - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); else /* Make sure the IDM bit is off */ current->thread.debug.dbcr0 &= ~DBCR0_IDM; @@ -2044,7 +2045,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2109,7 +2110,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) err = do_spe_mathemu(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } @@ -2140,10 +2141,10 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) giveup_spe(current); preempt_enable(); - regs->nip -= 4; + regs_add_return_ip(regs, -4); err = speround_handler(regs); if (err == 0) { - regs->nip += 4; /* skip emulated instruction */ + regs_add_return_ip(regs, 4); /* skip emulated instruction */ emulate_single_step(regs); return; } diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 9356b60d6030..8513aa49614e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,3 +296,42 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT + +#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) + +static u8 udbg_uart_in_isa300_rm(unsigned int reg) +{ + uint64_t msr = mfmsr(); + uint8_t c; + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); + return c; +} + +static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) +{ + uint64_t msr = mfmsr(); + + mtmsr(msr & ~(MSR_EE|MSR_DR)); + isync(); + eieio(); + __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); + mtmsr(msr); + isync(); +} + +void __init udbg_init_debug_microwatt(void) +{ + udbg_uart_in = udbg_uart_in_isa300_rm; + udbg_uart_out = udbg_uart_out_isa300_rm; + udbg_use_uart(); +} + +#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c index 186f69b11e94..c6975467d9ff 100644 --- a/arch/powerpc/kernel/uprobes.c +++ b/arch/powerpc/kernel/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, return -EINVAL; if (cpu_has_feature(CPU_FTR_ARCH_31) && - ppc_inst_prefixed(auprobe->insn) && + ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) && (addr & 0x3f) == 60) { pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n"); return -EINVAL; @@ -62,7 +62,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) autask->saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; - regs->nip = current->utask->xol_vaddr; + regs_set_return_ip(regs, current->utask->xol_vaddr); user_enable_single_step(current); return 0; @@ -119,7 +119,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * support doesn't exist and have to fix-up the next instruction * to be executed. */ - regs->nip = (unsigned long)ppc_inst_next((void *)utask->vaddr, &auprobe->insn); + regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn)); user_disable_single_step(current); return 0; @@ -182,7 +182,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) * emulate_step() returns 1 if the insn was successfully emulated. * For all other cases, we need to single-step in hardware. */ - ret = emulate_step(regs, ppc_inst_read(&auprobe->insn)); + ret = emulate_step(regs, ppc_inst_read(auprobe->insn)); if (ret > 0) return true; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index f5a52f444e36..fc120fac1910 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -73,6 +73,10 @@ _GLOBAL(load_up_altivec) addi r5,r4,THREAD /* Get THREAD */ oris r12,r12,MSR_VEC@h std r12,_MSR(r1) +#ifdef CONFIG_PPC_BOOK3S_64 + li r4,0 + stb r4,PACASRR_VALID(r13) +#endif #endif li r4,1 stb r4,THREAD_LOAD_VEC(r5) @@ -131,7 +135,9 @@ _GLOBAL(load_up_vsx) /* enable use of VSX after return */ oris r12,r12,MSR_VSX@h std r12,_MSR(r1) - b fast_interrupt_return + li r4,0 + stb r4,PACASRR_VALID(r13) + b fast_interrupt_return_srr #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 72fa3c00229a..40bdefe9caa7 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -9,6 +9,22 @@ #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define SOFT_MASK_TABLE(align) \ + . = ALIGN(align); \ + __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \ + __start___soft_mask_table = .; \ + KEEP(*(__soft_mask_table)) \ + __stop___soft_mask_table = .; \ + } + +#define RESTART_TABLE(align) \ + . = ALIGN(align); \ + __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \ + __start___restart_table = .; \ + KEEP(*(__restart_table)) \ + __stop___restart_table = .; \ + } + #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> @@ -124,6 +140,9 @@ SECTIONS RO_DATA(PAGE_SIZE) #ifdef CONFIG_PPC64 + SOFT_MASK_TABLE(8) + RESTART_TABLE(8) + . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { __start___stf_entry_barrier_fixup = .; diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index c9a8f4781a10..a165635fd214 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -24,6 +24,7 @@ #include <linux/kdebug.h> #include <linux/sched/debug.h> #include <linux/delay.h> +#include <linux/processor.h> #include <linux/smp.h> #include <asm/interrupt.h> diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 0196d0c211ac..10f997e6bb95 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -105,8 +105,8 @@ void crash_ipi_callback(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - int tries = 0; + volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + volatile int tries = 0; int (*old_handler)(struct pt_regs *regs); printk(KERN_EMERG "Sending IPI to other CPUs\n"); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index e8e7b2c530d1..4b3a8d80cfa3 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -353,9 +353,6 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu) preempt_enable(); } -/* From mm/mmu_context_hash32.c */ -#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff) - int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 260e860d53a2..1d1fcc290fca 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4626,6 +4626,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); + + srr_regs_clobbered(); + return r; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 71bcb0140461..6bc9425acb32 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -25,6 +25,7 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <linux/uaccess.h> +#include <asm/interrupt.h> #include <asm/io.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -1848,6 +1849,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu) /* Make sure we save the guest TAR/EBB/DSCR state */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); + srr_regs_clobbered(); out: vcpu->mode = OUTSIDE_GUEST_MODE; return ret; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index cc1a8a0f311e..99a7c9132422 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -39,7 +39,7 @@ extra-$(CONFIG_PPC64) += crtsavres.o endif obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \ - memcpy_power7.o + memcpy_power7.o restart_table.o obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \ memcpy_64.o copy_mc_64.o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 870b30d9be2f..f9a3019e37b4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -18,8 +18,7 @@ #include <asm/setup.h> #include <asm/inst.h> -static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr, - struct ppc_inst *patch_addr) +static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) { if (!ppc_inst_prefixed(instr)) { u32 val = ppc_inst_val(instr); @@ -40,7 +39,7 @@ failed: return -EFAULT; } -int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int raw_patch_instruction(u32 *addr, struct ppc_inst instr) { return __patch_instruction(addr, instr, addr); } @@ -70,22 +69,16 @@ static int text_area_cpu_down(unsigned int cpu) } /* - * Run as a late init call. This allows all the boot time patching to be done - * simply by patching the code, and then we're called here prior to - * mark_rodata_ro(), which happens after all init calls are run. Although - * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge - * it as being preferable to a kernel that will crash later when someone tries - * to use patch_instruction(). + * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and + * we judge it as being preferable to a kernel that will crash later when + * someone tries to use patch_instruction(). */ -static int __init setup_text_poke_area(void) +void __init poking_init(void) { BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/text_poke:online", text_area_cpu_up, text_area_cpu_down)); - - return 0; } -late_initcall(setup_text_poke_area); /* * This can be called for kernel text or a module. @@ -148,10 +141,10 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { int err; - struct ppc_inst *patch_addr = NULL; + u32 *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -172,7 +165,7 @@ static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) goto out; } - patch_addr = (struct ppc_inst *)(text_poke_addr + (kaddr & ~PAGE_MASK)); + patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); __patch_instruction(addr, instr, patch_addr); @@ -187,14 +180,14 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, struct ppc_inst instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) +int patch_instruction(u32 *addr, struct ppc_inst instr) { /* Make sure we aren't patching a freed init section */ if (init_mem_is_free && init_section_contains(addr, 4)) { @@ -205,7 +198,7 @@ int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr) } NOKPROBE_SYMBOL(patch_instruction); -int patch_branch(struct ppc_inst *addr, unsigned long target, int flags) +int patch_branch(u32 *addr, unsigned long target, int flags) { struct ppc_inst instr; @@ -257,8 +250,7 @@ bool is_conditional_branch(struct ppc_inst instr) } NOKPROBE_SYMBOL(is_conditional_branch); -int create_branch(struct ppc_inst *instr, - const struct ppc_inst *addr, +int create_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -278,7 +270,7 @@ int create_branch(struct ppc_inst *instr, return 0; } -int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, +int create_cond_branch(struct ppc_inst *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -325,39 +317,39 @@ int instr_is_relative_link_branch(struct ppc_inst instr) return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } -static unsigned long branch_iform_target(const struct ppc_inst *instr) +static unsigned long branch_iform_target(const u32 *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0x3FFFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x2000000) imm -= 0x4000000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -static unsigned long branch_bform_target(const struct ppc_inst *instr) +static unsigned long branch_bform_target(const u32 *instr) { signed long imm; - imm = ppc_inst_val(*instr) & 0xFFFC; + imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC; /* If the top bit of the immediate value is set this is negative */ if (imm & 0x8000) imm -= 0x10000; - if ((ppc_inst_val(*instr) & BRANCH_ABSOLUTE) == 0) + if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0) imm += (unsigned long)instr; return (unsigned long)imm; } -unsigned long branch_target(const struct ppc_inst *instr) +unsigned long branch_target(const u32 *instr) { if (instr_is_branch_iform(ppc_inst_read(instr))) return branch_iform_target(instr); @@ -367,17 +359,7 @@ unsigned long branch_target(const struct ppc_inst *instr) return 0; } -int instr_is_branch_to_addr(const struct ppc_inst *instr, unsigned long addr) -{ - if (instr_is_branch_iform(ppc_inst_read(instr)) || - instr_is_branch_bform(ppc_inst_read(instr))) - return branch_target(instr) == addr; - - return 0; -} - -int translate_branch(struct ppc_inst *instr, const struct ppc_inst *dest, - const struct ppc_inst *src) +int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) { unsigned long target; target = branch_target(src); @@ -404,12 +386,21 @@ void __patch_exception(int exc, unsigned long addr) * instruction of the exception, not the first one */ - patch_branch((struct ppc_inst *)(ibase + (exc / 4) + 1), addr, 0); + patch_branch(ibase + (exc / 4) + 1, addr, 0); } #endif #ifdef CONFIG_CODE_PATCHING_SELFTEST +static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + static void __init test_trampoline(void) { asm ("nop;\n"); @@ -422,9 +413,9 @@ static void __init test_branch_iform(void) { int err; struct ppc_inst instr; - unsigned long addr; - - addr = (unsigned long)&instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; /* The simplest case, branch to self, no flags */ check(instr_is_branch_iform(ppc_inst(0x48000000))); @@ -445,63 +436,68 @@ static void __init test_branch_iform(void) check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); /* Absolute branch to 0x100 */ - instr = ppc_inst(0x48000103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute branch to 0x420fc */ - instr = ppc_inst(0x480420ff); - check(instr_is_branch_to_addr(&instr, 0x420fc)); + patch_instruction(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); /* Maximum positive relative branch, + 20MB - 4B */ - instr = ppc_inst(0x49fffffc); - check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC)); + patch_instruction(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); /* Smallest negative relative branch, - 4B */ - instr = ppc_inst(0x4bfffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative branch, - 32 MB */ - instr = ppc_inst(0x4a000000); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + patch_instruction(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Branch to self, with link */ - err = create_branch(&instr, &instr, addr, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100, with link */ - err = create_branch(&instr, &instr, addr - 0x100, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100, no link */ - err = create_branch(&instr, &instr, addr + 0x100, 0); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + err = create_branch(&instr, iptr, addr + 0x100, 0); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 MB */ - err = create_branch(&instr, &instr, addr - 0x2000000, BRANCH_SET_LINK); - check(instr_is_branch_to_addr(&instr, addr - 0x2000000)); + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); /* Out of range relative negative offset, - 32 MB + 4*/ - err = create_branch(&instr, &instr, addr - 0x2000004, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); check(err); /* Out of range relative positive offset, + 32 MB */ - err = create_branch(&instr, &instr, addr + 0x2000000, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); check(err); /* Unaligned target */ - err = create_branch(&instr, &instr, addr + 3, BRANCH_SET_LINK); + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); check(err); /* Check flags are masked correctly */ - err = create_branch(&instr, &instr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x48000000))); } static void __init test_create_function_call(void) { - struct ppc_inst *iptr; + u32 *iptr; unsigned long dest; struct ppc_inst instr; /* Check we can create a function call */ - iptr = (struct ppc_inst *)ppc_function_entry(test_trampoline); + iptr = (u32 *)ppc_function_entry(test_trampoline); dest = ppc_function_entry(test_create_function_call); create_branch(&instr, iptr, dest, BRANCH_SET_LINK); patch_instruction(iptr, instr); @@ -512,10 +508,11 @@ static void __init test_branch_bform(void) { int err; unsigned long addr; - struct ppc_inst *iptr, instr; + struct ppc_inst instr; + u32 tmp[2]; + u32 *iptr = tmp; unsigned int flags; - iptr = &instr; addr = (unsigned long)iptr; /* The simplest case, branch to self, no flags */ @@ -528,39 +525,43 @@ static void __init test_branch_bform(void) check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); /* Absolute conditional branch to 0x100 */ - instr = ppc_inst(0x43ff0103); - check(instr_is_branch_to_addr(&instr, 0x100)); + patch_instruction(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); /* Absolute conditional branch to 0x20fc */ - instr = ppc_inst(0x43ff20ff); - check(instr_is_branch_to_addr(&instr, 0x20fc)); + patch_instruction(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); /* Maximum positive relative conditional branch, + 32 KB - 4B */ - instr = ppc_inst(0x43ff7ffc); - check(instr_is_branch_to_addr(&instr, addr + 0x7FFC)); + patch_instruction(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); /* Smallest negative relative conditional branch, - 4B */ - instr = ppc_inst(0x43fffffc); - check(instr_is_branch_to_addr(&instr, addr - 4)); + patch_instruction(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); /* Largest negative relative conditional branch, - 32 KB */ - instr = ppc_inst(0x43ff8000); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* All condition code bits set & link */ flags = 0x3ff000 | BRANCH_SET_LINK; /* Branch to self */ err = create_cond_branch(&instr, iptr, addr, flags); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); /* Branch to self - 0x100 */ err = create_cond_branch(&instr, iptr, addr - 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); /* Branch to self + 0x100 */ err = create_cond_branch(&instr, iptr, addr + 0x100, flags); - check(instr_is_branch_to_addr(&instr, addr + 0x100)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); /* Maximum relative negative offset, - 32 KB */ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); - check(instr_is_branch_to_addr(&instr, addr - 0x8000)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); /* Out of range relative negative offset, - 32 KB + 4*/ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); @@ -576,7 +577,8 @@ static void __init test_branch_bform(void) /* Check flags are masked correctly */ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); - check(instr_is_branch_to_addr(&instr, addr)); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); } @@ -715,9 +717,9 @@ static void __init test_prefixed_patching(void) extern unsigned int code_patching_test1_expected[]; extern unsigned int end_code_patching_test1[]; - __patch_instruction((struct ppc_inst *)code_patching_test1, + __patch_instruction(code_patching_test1, ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), - (struct ppc_inst *)code_patching_test1); + code_patching_test1); check(!memcmp(code_patching_test1, code_patching_test1_expected, diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c index 407b992fb02f..e834079d2b5c 100644 --- a/arch/powerpc/lib/error-inject.c +++ b/arch/powerpc/lib/error-inject.c @@ -11,6 +11,6 @@ void override_function_with_return(struct pt_regs *regs) * function in the kernel/module, captured on a kprobe. We don't need * to worry about 32-bit userspace on a 64-bit kernel. */ - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); } NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index fe26f2fa0f3f..cda17bee5afe 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -17,6 +17,7 @@ #include <linux/stop_machine.h> #include <asm/cputable.h> #include <asm/code-patching.h> +#include <asm/interrupt.h> #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> @@ -33,26 +34,25 @@ struct fixup_entry { long alt_end_off; }; -static struct ppc_inst *calc_addr(struct fixup_entry *fcur, long offset) +static u32 *calc_addr(struct fixup_entry *fcur, long offset) { /* * We store the offset to the code as a negative offset from * the start of the alt_entry, to support the VDSO. This * routine converts that back into an actual address. */ - return (struct ppc_inst *)((unsigned long)fcur + offset); + return (u32 *)((unsigned long)fcur + offset); } -static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, - struct ppc_inst *alt_start, struct ppc_inst *alt_end) +static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { int err; struct ppc_inst instr; instr = ppc_inst_read(src); - if (instr_is_relative_branch(*src)) { - struct ppc_inst *target = (struct ppc_inst *)branch_target(src); + if (instr_is_relative_branch(ppc_inst_read(src))) { + u32 *target = (u32 *)branch_target(src); /* Branch within the section doesn't need translating */ if (target < alt_start || target > alt_end) { @@ -69,7 +69,7 @@ static int patch_alt_instruction(struct ppc_inst *src, struct ppc_inst *dest, static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) { - struct ppc_inst *start, *end, *alt_start, *alt_end, *src, *dest, nop; + u32 *start, *end, *alt_start, *alt_end, *src, *dest; start = calc_addr(fcur, fcur->start_off); end = calc_addr(fcur, fcur->end_off); @@ -91,9 +91,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) return 1; } - nop = ppc_inst(PPC_INST_NOP); - for (; dest < end; dest = ppc_inst_next(dest, &nop)) - raw_patch_instruction(dest, nop); + for (; dest < end; dest++) + raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP())); return 0; } @@ -128,21 +127,21 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_entry_barrier_fixup); end = PTRRELOC(&__stop___stf_entry_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } else if (types & STF_BARRIER_SYNC_ORI) { - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R10, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { @@ -152,14 +151,14 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types) // See comment in do_entry_flush_fixups() RE order of patching if (types & STF_BARRIER_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -180,32 +179,31 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) start = PTRRELOC(&__start___stf_exit_barrier_fixup); end = PTRRELOC(&__stop___stf_exit_barrier_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x60000000; /* nop */ - instrs[4] = 0x60000000; /* nop */ - instrs[5] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_NOP(); + instrs[4] = PPC_RAW_NOP(); + instrs[5] = PPC_RAW_NOP(); i = 0; if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ - instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0); } else { - instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ - instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13); + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1); } - instrs[i++] = 0x7c0004ac; /* hwsync */ - instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - if (cpu_has_feature(CPU_FTR_HVMODE)) { - instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ - } else { - instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ - } + instrs[i++] = PPC_RAW_SYNC(); + instrs[i++] = PPC_RAW_LD(_R13, _R13, 0); + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1); + else + instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2); } else if (types & STF_BARRIER_EIEIO) { - instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */ } for (i = 0; start < end; start++, i++) { @@ -213,12 +211,12 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); - patch_instruction((struct ppc_inst *)(dest + 4), ppc_inst(instrs[4])); - patch_instruction((struct ppc_inst *)(dest + 5), ppc_inst(instrs[5])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); + patch_instruction(dest + 4, ppc_inst(instrs[4])); + patch_instruction(dest + 5, ppc_inst(instrs[5])); } printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, (types == STF_BARRIER_NONE) ? "no" : @@ -228,6 +226,9 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static bool stf_exit_reentrant = false; +static bool rfi_exit_reentrant = false; + static int __do_stf_barrier_fixups(void *data) { enum stf_barrier_type *types = data; @@ -242,11 +243,27 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) { /* * The call to the fallback entry flush, and the fallback/sync-ori exit - * flush can not be safely patched in/out while other CPUs are executing - * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs - * spin in the stop machine core with interrupts hard disabled. + * flush can not be safely patched in/out while other CPUs are + * executing them. So call __do_stf_barrier_fixups() on one CPU while + * all other CPUs spin in the stop machine core with interrupts hard + * disabled. + * + * The branch to mark interrupt exits non-reentrant is enabled first, + * then stop_machine runs which will ensure all CPUs are out of the + * low level interrupt exit code before patching. After the patching, + * if allowed, then flip the branch to allow fast exits. */ + static_branch_enable(&interrupt_exit_not_reentrant); + stop_machine(__do_stf_barrier_fixups, &types, NULL); + + if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI)) + stf_exit_reentrant = false; + else + stf_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); } void do_uaccess_flush_fixups(enum l1d_flush_type types) @@ -258,35 +275,35 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___uaccess_flush_fixup); end = PTRRELOC(&__stop___uaccess_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ - instrs[3] = 0x4e800020; /* blr */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); + instrs[3] = PPC_RAW_BLR(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[3] = 0x60000000; /* nop */ + instrs[3] = PPC_RAW_NOP(); /* fallthrough to fallback flush */ } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest + 3, ppc_inst(instrs[3])); } printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, @@ -306,24 +323,24 @@ static int __do_entry_flush_fixups(void *data) long *start, *end; int i; - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); i = 0; if (types == L1D_FLUSH_FALLBACK) { - instrs[i++] = 0x7d4802a6; /* mflr r10 */ - instrs[i++] = 0x60000000; /* branch patched below */ - instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + instrs[i++] = PPC_RAW_MFLR(_R10); + instrs[i++] = PPC_RAW_NOP(); /* branch patched below */ + instrs[i++] = PPC_RAW_MTLR(_R10); } if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); /* * If we're patching in or out the fallback flush we need to be careful about the @@ -358,14 +375,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -377,14 +394,14 @@ static int __do_entry_flush_fixups(void *data) pr_devel("patching dest %lx\n", (unsigned long)dest); if (types == L1D_FLUSH_FALLBACK) { - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_branch((struct ppc_inst *)(dest + 1), + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_branch(dest + 1, (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK); } else { - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); } } @@ -412,8 +429,9 @@ void do_entry_flush_fixups(enum l1d_flush_type types) stop_machine(__do_entry_flush_fixups, &types, NULL); } -void do_rfi_flush_fixups(enum l1d_flush_type types) +static int __do_rfi_flush_fixups(void *data) { + enum l1d_flush_type types = *(enum l1d_flush_type *)data; unsigned int instrs[3], *dest; long *start, *end; int i; @@ -421,31 +439,31 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) start = PTRRELOC(&__start___rfi_flush_fixup); end = PTRRELOC(&__stop___rfi_flush_fixup); - instrs[0] = 0x60000000; /* nop */ - instrs[1] = 0x60000000; /* nop */ - instrs[2] = 0x60000000; /* nop */ + instrs[0] = PPC_RAW_NOP(); + instrs[1] = PPC_RAW_NOP(); + instrs[2] = PPC_RAW_NOP(); if (types & L1D_FLUSH_FALLBACK) /* b .+16 to fallback flush */ - instrs[0] = 0x48000010; + instrs[0] = PPC_INST_BRANCH | 16; i = 0; if (types & L1D_FLUSH_ORI) { - instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ - instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ + instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */ } if (types & L1D_FLUSH_MTTRIG) - instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); - patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction(dest, ppc_inst(instrs[0])); + patch_instruction(dest + 1, ppc_inst(instrs[1])); + patch_instruction(dest + 2, ppc_inst(instrs[2])); } printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, @@ -456,6 +474,29 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) : "ori type" : (types & L1D_FLUSH_MTTRIG) ? "mttrig type" : "unknown"); + + return 0; +} + +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + /* + * stop_machine gets all CPUs out of the interrupt exit handler same + * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run + * without stop_machine, so this could be achieved with a broadcast + * IPI instead, but this matches the stf sequence. + */ + static_branch_enable(&interrupt_exit_not_reentrant); + + stop_machine(__do_rfi_flush_fixups, &types, NULL); + + if (types & L1D_FLUSH_FALLBACK) + rfi_exit_reentrant = false; + else + rfi_exit_reentrant = true; + + if (stf_exit_reentrant && rfi_exit_reentrant) + static_branch_disable(&interrupt_exit_not_reentrant); } void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end) @@ -467,18 +508,18 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr = 0x60000000; /* nop */ + instr = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using ORI speculation barrier\n"); - instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */ } for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr)); + patch_instruction(dest, ppc_inst(instr)); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -508,21 +549,21 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ start = fixup_start; end = fixup_end; - instr[0] = PPC_INST_NOP; - instr[1] = PPC_INST_NOP; + instr[0] = PPC_RAW_NOP(); + instr[1] = PPC_RAW_NOP(); if (enable) { pr_info("barrier-nospec: using isync; sync as speculation barrier\n"); - instr[0] = PPC_INST_ISYNC; - instr[1] = PPC_INST_SYNC; + instr[0] = PPC_RAW_ISYNC(); + instr[1] = PPC_RAW_SYNC(); } for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; pr_devel("patching dest %lx\n", (unsigned long)dest); - patch_instruction((struct ppc_inst *)dest, ppc_inst(instr[0])); - patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instr[1])); + patch_instruction(dest, ppc_inst(instr[0])); + patch_instruction(dest + 1, ppc_inst(instr[1])); } printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); @@ -536,7 +577,7 @@ static void patch_btb_flush_section(long *curr) end = (void *)curr + *(curr + 1); for (; start < end; start++) { pr_devel("patching dest %lx\n", (unsigned long)start); - patch_instruction((struct ppc_inst *)start, ppc_inst(PPC_INST_NOP)); + patch_instruction(start, ppc_inst(PPC_RAW_NOP())); } } @@ -555,7 +596,7 @@ void do_btb_flush_fixups(void) void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; - struct ppc_inst *dest; + u32 *dest; if (!(value & CPU_FTR_LWSYNC)) return ; @@ -572,13 +613,14 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - struct ppc_inst inst, *src, *dest, *end; + struct ppc_inst inst; + u32 *src, *dest, *end; if (PHYSICAL_START == 0) return; - src = (struct ppc_inst *)(KERNELBASE + PHYSICAL_START); - dest = (struct ppc_inst *)KERNELBASE; + src = (u32 *)(KERNELBASE + PHYSICAL_START); + dest = (u32 *)KERNELBASE; end = (void *)src + (__end_interrupts - _stext); while (src < end) { diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c new file mode 100644 index 000000000000..bccb662c1b7b --- /dev/null +++ b/arch/powerpc/lib/restart_table.c @@ -0,0 +1,56 @@ +#include <asm/interrupt.h> +#include <asm/kprobes.h> + +struct soft_mask_table_entry { + unsigned long start; + unsigned long end; +}; + +struct restart_table_entry { + unsigned long start; + unsigned long end; + unsigned long fixup; +}; + +extern struct soft_mask_table_entry __start___soft_mask_table[]; +extern struct soft_mask_table_entry __stop___soft_mask_table[]; + +extern struct restart_table_entry __start___restart_table[]; +extern struct restart_table_entry __stop___restart_table[]; + +/* Given an address, look for it in the soft mask table */ +bool search_kernel_soft_mask_table(unsigned long addr) +{ + struct soft_mask_table_entry *smte = __start___soft_mask_table; + + while (smte < __stop___soft_mask_table) { + unsigned long start = smte->start; + unsigned long end = smte->end; + + if (addr >= start && addr < end) + return true; + + smte++; + } + return false; +} +NOKPROBE_SYMBOL(search_kernel_soft_mask_table); + +/* Given an address, look for it in the kernel exception table */ +unsigned long search_kernel_restart_table(unsigned long addr) +{ + struct restart_table_entry *rte = __start___restart_table; + + while (rte < __stop___restart_table) { + unsigned long start = rte->start; + unsigned long end = rte->end; + unsigned long fixup = rte->fixup; + + if (addr >= start && addr < end) + return fixup; + + rte++; + } + return 0; +} +NOKPROBE_SYMBOL(search_kernel_restart_table); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 45bda2520755..d8d5f901cee1 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1700,6 +1700,28 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->val = regs->ccr & imm; goto compute_done; + case 128: /* setb */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + /* + * 'ra' encodes the CR field number (bfa) in the top 3 bits. + * Since each CR field is 4 bits, + * we can simply mask off the bottom two bits (bfa * 4) + * to yield the first bit in the CR field. + */ + ra = ra & ~0x3; + /* 'val' stores bits of the CR field (bfa) */ + val = regs->ccr >> (CR0_SHIFT - ra); + /* checks if the LT bit of CR field (bfa) is set */ + if (val & 8) + op->val = -1; + /* checks if the GT bit of CR field (bfa) is set */ + else if (val & 4) + op->val = 1; + else + op->val = 0; + goto compute_done; + case 144: /* mtcrf */ op->type = COMPUTE + SETCC; imm = 0xf0000000UL; @@ -3203,7 +3225,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) default: WARN_ON_ONCE(1); } - regs->nip = next_pc; + regs_set_return_ip(regs, next_pc); } NOKPROBE_SYMBOL(emulate_update_regs); @@ -3541,7 +3563,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) /* can't step mtmsr[d] that would clear MSR_RI */ return -1; /* here op.val is the mask of bits to change */ - regs->msr = (regs->msr & ~op.val) | (val & op.val); + regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val)); goto instr_done; #ifdef CONFIG_PPC64 @@ -3554,7 +3576,7 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) if (IS_ENABLED(CONFIG_PPC_FAST_ENDIAN_SWITCH) && cpu_has_feature(CPU_FTR_REAL_LE) && regs->gpr[0] == 0x1ebe) { - regs->msr ^= MSR_LE; + regs_set_return_msr(regs, regs->msr ^ MSR_LE); goto instr_done; } regs->gpr[9] = regs->gpr[13]; @@ -3562,8 +3584,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->gpr[11] = regs->nip + 4; regs->gpr[12] = regs->msr & MSR_MASK; regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; + regs_set_return_ip(regs, (unsigned long) &system_call_common); + regs_set_return_msr(regs, MSR_KERNEL); return 1; #ifdef CONFIG_PPC_BOOK3S_64 @@ -3573,8 +3595,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) regs->gpr[11] = regs->nip + 4; regs->gpr[12] = regs->msr & MSR_MASK; regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_vectored_emulate; - regs->msr = MSR_KERNEL; + regs_set_return_ip(regs, (unsigned long) &system_call_vectored_emulate); + regs_set_return_msr(regs, MSR_KERNEL); return 1; #endif @@ -3585,7 +3607,8 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr) return 0; instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)); + regs_set_return_ip(regs, + truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type))); return 1; } NOKPROBE_SYMBOL(emulate_step); diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 783d1b85ecfe..8b4f6b3e96c4 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -53,6 +53,8 @@ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \ PPC_RAW_ADDI(t, a, i)) +#define TEST_SETB(t, bfa) ppc_inst(PPC_INST_SETB | ___PPC_RT(t) | ___PPC_RA((bfa & 0x7) << 2)) + static void __init init_pt_regs(struct pt_regs *regs) { @@ -824,8 +826,7 @@ static void __init test_plxvp_pstxvp(void) * XTp = 32xTX + 2xTp * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PLXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PLXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) { @@ -853,8 +854,7 @@ static void __init test_plxvp_pstxvp(void) * XSp = 32xSX + 2xSp * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1 */ - instr = ppc_inst_prefix(PPC_RAW_PSTXVP(34, 0, 3, 0) >> 32, - PPC_RAW_PSTXVP(34, 0, 3, 0) & 0xffffffff); + instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0)); stepped = emulate_step(®s, instr); @@ -922,7 +922,7 @@ static struct compute_test compute_tests[] = { .subtests = { { .descr = "R0 = LONG_MAX", - .instr = ppc_inst(PPC_INST_NOP), + .instr = ppc_inst(PPC_RAW_NOP()), .regs = { .gpr[0] = LONG_MAX, } @@ -930,6 +930,33 @@ static struct compute_test compute_tests[] = { } }, { + .mnemonic = "setb", + .cpu_feature = CPU_FTR_ARCH_300, + .subtests = { + { + .descr = "BFA = 1, CR = GT", + .instr = TEST_SETB(20, 1), + .regs = { + .ccr = 0x4000000, + } + }, + { + .descr = "BFA = 4, CR = LT", + .instr = TEST_SETB(20, 4), + .regs = { + .ccr = 0x8000, + } + }, + { + .descr = "BFA = 5, CR = EQ", + .instr = TEST_SETB(20, 5), + .regs = { + .ccr = 0x200, + } + } + } + }, + { .mnemonic = "add", .subtests = { { @@ -1582,6 +1609,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs, if (!regs || !ppc_inst_val(instr)) return -EINVAL; + /* This is not a return frame regs */ regs->nip = patch_site_addr(&patch__exec_instr); analysed = analyse_instr(&op, regs, instr); diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c index 327165f26ca6..36761bd00f38 100644 --- a/arch/powerpc/math-emu/math.c +++ b/arch/powerpc/math-emu/math.c @@ -453,7 +453,7 @@ do_mathemu(struct pt_regs *regs) break; } - regs->nip += 4; + regs_add_return_ip(regs, 4); return 0; illegal: diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c index 0a05e51964c1..39b84e7452e1 100644 --- a/arch/powerpc/math-emu/math_efp.c +++ b/arch/powerpc/math-emu/math_efp.c @@ -710,7 +710,7 @@ update_regs: illegal: if (have_e500_cpu_a005_erratum) { /* according to e500 cpu a005 erratum, reissue efp inst */ - regs->nip -= 4; + regs_add_return_ip(regs, -4); pr_debug("re-issue efp inst: %08lx\n", speinsn); return 0; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 2ffcf540f08b..eae4ec2988fc 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o \ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 7f0c8a78ba0c..15f4773643d2 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -10,3 +10,4 @@ obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o obj-$(CONFIG_PPC_KUEP) += kuep.o +obj-$(CONFIG_PPC_KUAP) += kuap.o diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index fb4233a5bdf7..6925ce998557 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -27,8 +27,10 @@ #include <asm/code-patching-asm.h> #ifdef CONFIG_PTE_64BIT +#define PTE_T_SIZE 8 #define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */ #else +#define PTE_T_SIZE 4 #define PTE_FLAGS_OFFSET 0 #endif @@ -488,7 +490,7 @@ _GLOBAL(flush_hash_pages) bne 2f ble cr1,19f addi r4,r4,0x1000 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r6,r6,-1 b 1b @@ -573,7 +575,7 @@ _GLOBAL(flush_hash_pages) 8: ble cr1,9f /* if all ptes checked */ 81: addi r6,r6,-1 - addi r5,r5,PTE_SIZE + addi r5,r5,PTE_T_SIZE addi r4,r4,0x1000 lwz r0,0(r5) /* check next pte */ cmpwi cr1,r6,1 diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c new file mode 100644 index 000000000000..0f920f09af57 --- /dev/null +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <asm/kup.h> +#include <asm/smp.h> + +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + +void kuap_lock_all_ool(void) +{ + kuap_lock_all(); +} +EXPORT_SYMBOL(kuap_lock_all_ool); + +void kuap_unlock_all_ool(void) +{ + kuap_unlock_all(); +} +EXPORT_SYMBOL(kuap_unlock_all_ool); + +void setup_kuap(bool disabled) +{ + if (!disabled) + kuap_lock_all_ool(); + + if (smp_processor_id() != boot_cpuid) + return; + + if (disabled) + static_branch_enable(&disable_kuap_key); + else + pr_info("Activating Kernel Userspace Access Protection\n"); +} diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c index 8ed1b8634839..c20733d6e02c 100644 --- a/arch/powerpc/mm/book3s32/kuep.c +++ b/arch/powerpc/mm/book3s32/kuep.c @@ -1,40 +1,20 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <asm/kup.h> -#include <asm/reg.h> -#include <asm/task_size_32.h> -#include <asm/mmu.h> +#include <asm/smp.h> -#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { \ - if (TASK_SIZE > ((n) << 28)) \ - mtsr(val1, (n) << 28); \ - if (TASK_SIZE > (((n) + 1) << 28)) \ - mtsr(val2, ((n) + 1) << 28); \ - val1 = (val1 + 0x222) & 0xf0ffffff; \ - val2 = (val2 + 0x222) & 0xf0ffffff; \ -} while (0) +struct static_key_false disable_kuep_key; -static __always_inline void kuep_update(u32 val) +void setup_kuep(bool disabled) { - int val1 = val; - int val2 = (val + 0x111) & 0xf0ffffff; + if (!disabled) + kuep_lock(); - KUEP_UPDATE_TWO_USER_SEGMENTS(0); - KUEP_UPDATE_TWO_USER_SEGMENTS(2); - KUEP_UPDATE_TWO_USER_SEGMENTS(4); - KUEP_UPDATE_TWO_USER_SEGMENTS(6); - KUEP_UPDATE_TWO_USER_SEGMENTS(8); - KUEP_UPDATE_TWO_USER_SEGMENTS(10); - KUEP_UPDATE_TWO_USER_SEGMENTS(12); - KUEP_UPDATE_TWO_USER_SEGMENTS(14); -} + if (smp_processor_id() != boot_cpuid) + return; -void kuep_lock(void) -{ - kuep_update(mfsr(0) | SR_NX); -} - -void kuep_unlock(void) -{ - kuep_update(mfsr(0) & ~SR_NX); + if (disabled) + static_branch_enable(&disable_kuep_key); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); } diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 159930351d9f..27061583a010 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -445,26 +445,6 @@ void __init print_system_hash_info(void) pr_info("Hash_mask = 0x%lx\n", Hash_mask); } -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - if (disabled) - pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - -#ifdef CONFIG_PPC_KUAP -void __init setup_kuap(bool disabled) -{ - pr_info("Activating Kernel Userspace Access Protection\n"); - - if (disabled) - pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); -} -#endif - void __init early_init_mmu(void) { } diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index 218996e40a8e..e2708e387dc3 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -24,6 +24,12 @@ #include <asm/mmu_context.h> /* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +void *abatron_pteptrs[2]; + +/* * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs * (virtual segment identifiers) for each context. Although the * hardware supports 24-bit VSIDs, and thus >1 million contexts, @@ -39,19 +45,6 @@ #define LAST_CONTEXT 32767 #define FIRST_CONTEXT 1 -/* - * This function defines the mapping from contexts to VSIDs (virtual - * segment IDs). We use a skew on both the context and the high 4 bits - * of the 32-bit virtual address (the "effective segment ID") in order - * to spread out the entries in the MMU hash table. Note, if this - * function is changed then arch/ppc/mm/hashtable.S will have to be - * changed to correspond. - * - * - * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \ - * & 0xffffff) - */ - static unsigned long next_mmu_context; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; @@ -111,3 +104,32 @@ void __init mmu_context_init(void) context_map[0] = (1 << FIRST_CONTEXT) - 1; next_mmu_context = FIRST_CONTEXT; } + +void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +{ + long id = next->context.id; + unsigned long val; + + if (id < 0) + panic("mm_struct %p has no context ID", next); + + isync(); + + val = CTX_TO_VSID(id, 0); + if (!kuep_is_disabled()) + val |= SR_NX; + if (!kuap_is_disabled()) + val |= SR_KS; + + update_user_segments(val); + + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; + + if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) + mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff); + + mb(); /* sync */ + isync(); +} +EXPORT_SYMBOL(switch_mmu_context); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 96d9aa164007..ac5720371c0d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); -DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) +DECLARE_INTERRUPT_HANDLER(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER(__do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) unsigned int region_id; long err; + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { + hash__do_page_fault(regs); + return; + } + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; @@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) bad_page_fault(regs, SIGBUS); } err = 0; - } - return err; + } else if (err) { + hash__do_page_fault(regs); + } } /* @@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) */ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { - unsigned long dsisr = regs->dsisr; - - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) { - hash__do_page_fault(regs); - return 0; - } - /* * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then * don't call hash_page, just fail the fault. This is required to @@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) return 0; } - if (__do_hash_fault(regs)) - hash__do_page_fault(regs); + __do_hash_fault(regs); return 0; } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 318ec4f33661..ae12fb5559cb 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -354,22 +354,30 @@ static inline void fixup_tlbie_lpid(unsigned long lpid) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; asm volatile("ptesync": : :"memory"); - /* - * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, - * also flush the entire Page Walk Cache. - */ - __tlbiel_pid(pid, 0, ric); + switch (ric) { + case RIC_FLUSH_PWC: - /* For PWC, only one flush is needed */ - if (ric == RIC_FLUSH_PWC) { + /* For PWC, only one flush is needed */ + __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); ppc_after_tlbiel_barrier(); return; + case RIC_FLUSH_TLB: + __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_ALL: + default: + /* + * Flush the first set of the TLB, and if + * we're doing a RIC_FLUSH_ALL, also flush + * the entire Page Walk Cache. + */ + __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); } if (!cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -1290,7 +1298,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) } } -static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, +static void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize, bool also_pwc) { diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c index 743e11384dea..9d13143b8be4 100644 --- a/arch/powerpc/mm/ioremap_32.c +++ b/arch/powerpc/mm/ioremap_32.c @@ -70,10 +70,10 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call */ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller); - err = early_ioremap_range(ioremap_bot - size, p, size, prot); + err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot); if (err) return NULL; - ioremap_bot -= size; + ioremap_bot -= size + PAGE_SIZE; return (void __iomem *)ioremap_bot + offset; } diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index ba5cbb0d66bd..3acece00b33e 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -38,7 +38,7 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, return NULL; ret = (void __iomem *)ioremap_bot + offset; - ioremap_bot += size; + ioremap_bot += size + PAGE_SIZE; return ret; } diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index a3c30a884076..aad7c47e0030 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -12,7 +12,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) return is_kernel_addr((unsigned long)unsafe_src); } -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) +int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) { unsigned int val, suffix; int err; @@ -21,7 +21,7 @@ int copy_inst_from_kernel_nofault(struct ppc_inst *inst, struct ppc_inst *src) if (err) return err; if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c5e520c6f13b..ad198b439222 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -28,6 +28,9 @@ unsigned long long memory_limit; bool init_mem_is_free; +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -299,6 +302,10 @@ void __init mem_init(void) ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", VMALLOC_START, VMALLOC_END); +#ifdef MODULES_VADDR + pr_info(" * 0x%08lx..0x%08lx : modules\n", + MODULES_VADDR, MODULES_END); +#endif #endif /* CONFIG_PPC32 */ } diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index 3d6ae7c72412..e079f26b267e 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -25,6 +25,7 @@ #include <asm/page.h> #include <asm/cacheflush.h> #include <asm/code-patching.h> +#include <asm/smp.h> #include <mm/mmu_decl.h> @@ -239,3 +240,19 @@ void __init mmu_init_secondary(int cpu) } } #endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) +{ + if (smp_processor_id() != boot_cpuid) + return; + + if (disabled) + patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); + else + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (IS_ENABLED(CONFIG_PPC_47x) && disabled) + patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); +} +#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 71bfdbedacee..60780e089118 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -212,37 +212,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } -/* - * Set up to use a given MMU context. - * id is context number, pgd is PGD pointer. - * - * We place the physical address of the new task page directory loaded - * into the MMU base register, and set the ASID compare register with - * the new "context." - */ -void set_context(unsigned long id, pgd_t *pgd) -{ - s16 offset = (s16)(__pa(swapper_pg_dir)); - - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is passed as second argument. - */ - if (IS_ENABLED(CONFIG_BDI_SWITCH)) - abatron_pteptrs[1] = pgd; - - /* Register M_TWB will contain base address of level 1 table minus the - * lower part of the kernel PGDIR base address, so that all accesses to - * level 1 table are done relative to lower part of kernel PGDIR base - * address. - */ - mtspr(SPRN_M_TWB, __pa(pgd) - offset); - - /* Update context */ - mtspr(SPRN_M_CASID, id - 1); - /* sync */ - mb(); -} - #ifdef CONFIG_PPC_KUEP void __init setup_kuep(bool disabled) { @@ -256,12 +225,17 @@ void __init setup_kuep(bool disabled) #endif #ifdef CONFIG_PPC_KUAP +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + void __init setup_kuap(bool disabled) { - pr_info("Activating Kernel Userspace Access Protection\n"); + if (disabled) { + static_branch_enable(&disable_kuap_key); + return; + } - if (disabled) - pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + pr_info("Activating Kernel Userspace Access Protection\n"); mtspr(SPRN_MD_AP, MD_APG_KUAP); } diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index aac81c9f84a5..44b2b5e7cabe 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -21,21 +21,6 @@ * also clear mm->cpu_vm_mask bits when processes are migrated */ -//#define DEBUG_MAP_CONSISTENCY -//#define DEBUG_CLAMP_LAST_CONTEXT 31 -//#define DEBUG_HARDER - -/* We don't use DEBUG because it tends to be compiled in always nowadays - * and this would generate way too much output - */ -#ifdef DEBUG_HARDER -#define pr_hard(args...) printk(KERN_DEBUG args) -#define pr_hardcont(args...) printk(KERN_CONT args) -#else -#define pr_hard(args...) do { } while(0) -#define pr_hardcont(args...) do { } while(0) -#endif - #include <linux/kernel.h> #include <linux/mm.h> #include <linux/init.h> @@ -47,10 +32,17 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> +#include <asm/smp.h> #include <mm/mmu_decl.h> /* + * Room for two PTE table pointers, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +void *abatron_pteptrs[2]; + +/* * The MPC8xx has only 16 contexts. We rotate through them on each task switch. * A better way would be to keep track of tasks that own contexts, and implement * an LRU usage. That way very active tasks don't always have to pay the TLB @@ -68,9 +60,7 @@ * -- BenH */ #define FIRST_CONTEXT 1 -#ifdef DEBUG_CLAMP_LAST_CONTEXT -#define LAST_CONTEXT DEBUG_CLAMP_LAST_CONTEXT -#elif defined(CONFIG_PPC_8xx) +#if defined(CONFIG_PPC_8xx) #define LAST_CONTEXT 16 #elif defined(CONFIG_PPC_47x) #define LAST_CONTEXT 65535 @@ -80,9 +70,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; -#ifdef CONFIG_SMP static unsigned long *stale_map[NR_CPUS]; -#endif static struct mm_struct **context_mm; static DEFINE_RAW_SPINLOCK(context_lock); @@ -105,7 +93,6 @@ static DEFINE_RAW_SPINLOCK(context_lock); * the stale map as we can just flush the local CPU * -- benh */ -#ifdef CONFIG_SMP static unsigned int steal_context_smp(unsigned int id) { struct mm_struct *mm; @@ -127,7 +114,6 @@ static unsigned int steal_context_smp(unsigned int id) id = FIRST_CONTEXT; continue; } - pr_hardcont(" | steal %d from 0x%p", id, mm); /* Mark this mm has having no context anymore */ mm->context.id = MMU_NO_CONTEXT; @@ -158,34 +144,25 @@ static unsigned int steal_context_smp(unsigned int id) /* This will cause the caller to try again */ return MMU_NO_CONTEXT; } -#endif /* CONFIG_SMP */ static unsigned int steal_all_contexts(void) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif unsigned int id; for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Mark this mm as having no context anymore */ mm->context.id = MMU_NO_CONTEXT; if (id != FIRST_CONTEXT) { context_mm[id] = NULL; __clear_bit(id, context_map); -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif } -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); } /* Flush the TLB for all contexts (not to be used on SMP) */ @@ -204,15 +181,11 @@ static unsigned int steal_all_contexts(void) static unsigned int steal_context_up(unsigned int id) { struct mm_struct *mm; -#ifdef CONFIG_SMP int cpu = smp_processor_id(); -#endif /* Pick up the victim mm */ mm = context_mm[id]; - pr_hardcont(" | steal %d from 0x%p", id, mm); - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); @@ -220,81 +193,64 @@ static unsigned int steal_context_up(unsigned int id) mm->context.id = MMU_NO_CONTEXT; /* XXX This clear should ultimately be part of local_flush_tlb_mm */ -#ifdef CONFIG_SMP - __clear_bit(id, stale_map[cpu]); -#endif + if (IS_ENABLED(CONFIG_SMP)) + __clear_bit(id, stale_map[cpu]); return id; } -#ifdef DEBUG_MAP_CONSISTENCY -static void context_check_map(void) +static void set_context(unsigned long id, pgd_t *pgd) { - unsigned int id, nrf, nact; + if (IS_ENABLED(CONFIG_PPC_8xx)) { + s16 offset = (s16)(__pa(swapper_pg_dir)); + + /* + * Register M_TWB will contain base address of level 1 table minus the + * lower part of the kernel PGDIR base address, so that all accesses to + * level 1 table are done relative to lower part of kernel PGDIR base + * address. + */ + mtspr(SPRN_M_TWB, __pa(pgd) - offset); - nrf = nact = 0; - for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) { - int used = test_bit(id, context_map); - if (!used) - nrf++; - if (used != (context_mm[id] != NULL)) - pr_err("MMU: Context %d is %s and MM is %p !\n", - id, used ? "used" : "free", context_mm[id]); - if (context_mm[id] != NULL) - nact += context_mm[id]->context.active; - } - if (nrf != nr_free_contexts) { - pr_err("MMU: Free context count out of sync ! (%d vs %d)\n", - nr_free_contexts, nrf); - nr_free_contexts = nrf; + /* Update context */ + mtspr(SPRN_M_CASID, id - 1); + + /* sync */ + mb(); + } else { + if (IS_ENABLED(CONFIG_40x)) + mb(); /* sync */ + + mtspr(SPRN_PID, id); + isync(); } - if (nact > num_online_cpus()) - pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n", - nact, num_online_cpus()); - if (FIRST_CONTEXT > 0 && !test_bit(0, context_map)) - pr_err("MMU: Context 0 has been freed !!!\n"); } -#else -static void context_check_map(void) { } -#endif void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { unsigned int id; -#ifdef CONFIG_SMP unsigned int i, cpu = smp_processor_id(); -#endif unsigned long *map; /* No lockless fast path .. yet */ raw_spin_lock(&context_lock); - pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", - cpu, next, next->context.active, next->context.id); - -#ifdef CONFIG_SMP - /* Mark us active and the previous one not anymore */ - next->context.active++; - if (prev) { - pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active); - WARN_ON(prev->context.active < 1); - prev->context.active--; + if (IS_ENABLED(CONFIG_SMP)) { + /* Mark us active and the previous one not anymore */ + next->context.active++; + if (prev) { + WARN_ON(prev->context.active < 1); + prev->context.active--; + } } again: -#endif /* CONFIG_SMP */ /* If we already have a valid assigned context, skip all that */ id = next->context.id; - if (likely(id != MMU_NO_CONTEXT)) { -#ifdef DEBUG_MAP_CONSISTENCY - if (context_mm[id] != next) - pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n", - next, id, id, context_mm[id]); -#endif + if (likely(id != MMU_NO_CONTEXT)) goto ctxt_ok; - } /* We really don't have a context, let's try to acquire one */ id = next_context; @@ -304,14 +260,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, /* No more free contexts, let's try to steal one */ if (nr_free_contexts == 0) { -#ifdef CONFIG_SMP if (num_online_cpus() > 1) { id = steal_context_smp(id); if (id == MMU_NO_CONTEXT) goto again; goto stolen; } -#endif /* CONFIG_SMP */ if (IS_ENABLED(CONFIG_PPC_8xx)) id = steal_all_contexts(); else @@ -330,20 +284,13 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, next_context = id + 1; context_mm[id] = next; next->context.id = id; - pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts); - context_check_map(); ctxt_ok: /* If that context got marked stale on this CPU, then flush the * local TLB for it and unmark it before we use it */ -#ifdef CONFIG_SMP - if (test_bit(id, stale_map[cpu])) { - pr_hardcont(" | stale flush %d [%d..%d]", - id, cpu_first_thread_sibling(cpu), - cpu_last_thread_sibling(cpu)); - + if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) { local_flush_tlb_mm(next); /* XXX This clear should ultimately be part of local_flush_tlb_mm */ @@ -353,10 +300,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, __clear_bit(id, stale_map[i]); } } -#endif /* Flick the MMU and release lock */ - pr_hardcont(" -> %d\n", id); + if (IS_ENABLED(CONFIG_BDI_SWITCH)) + abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); raw_spin_unlock(&context_lock); } @@ -366,8 +313,6 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, */ int init_new_context(struct task_struct *t, struct mm_struct *mm) { - pr_hard("initing context for mm @%p\n", mm); - /* * We have MMU_NO_CONTEXT set to be ~0. Hence check * explicitly against context.id == 0. This ensures that we properly @@ -401,16 +346,12 @@ void destroy_context(struct mm_struct *mm) if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); mm->context.id = MMU_NO_CONTEXT; -#ifdef DEBUG_MAP_CONSISTENCY - mm->context.active = 0; -#endif context_mm[id] = NULL; nr_free_contexts++; } raw_spin_unlock_irqrestore(&context_lock, flags); } -#ifdef CONFIG_SMP static int mmu_ctx_cpu_prepare(unsigned int cpu) { /* We don't touch CPU 0 map, it's allocated at aboot and kept @@ -419,7 +360,6 @@ static int mmu_ctx_cpu_prepare(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); return 0; } @@ -430,7 +370,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) if (cpu == boot_cpuid) return 0; - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); kfree(stale_map[cpu]); stale_map[cpu] = NULL; @@ -440,8 +379,6 @@ static int mmu_ctx_cpu_dead(unsigned int cpu) return 0; } -#endif /* CONFIG_SMP */ - /* * Initialize the context management stuff. */ @@ -465,16 +402,16 @@ void __init mmu_context_init(void) if (!context_mm) panic("%s: Failed to allocate %zu bytes\n", __func__, sizeof(void *) * (LAST_CONTEXT + 1)); -#ifdef CONFIG_SMP - stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); - if (!stale_map[boot_cpuid]) - panic("%s: Failed to allocate %zu bytes\n", __func__, - CTX_MAP_SIZE); - - cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, - "powerpc/mmu/ctx:prepare", - mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); -#endif + if (IS_ENABLED(CONFIG_SMP)) { + stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES); + if (!stale_map[boot_cpuid]) + panic("%s: Failed to allocate %zu bytes\n", __func__, + CTX_MAP_SIZE); + + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); + } printk(KERN_INFO "MMU: Allocated %zu bytes of context maps for %d contexts\n", diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 68797e072f55..4613bf8e9aae 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -360,19 +360,6 @@ _GLOBAL(_tlbivax_bcast) sync wrtee r10 blr - -_GLOBAL(set_context) -#ifdef CONFIG_BDI_SWITCH - /* Context switch the PTE pointer for the Abatron BDI2000. - * The PGDIR is the second parameter. - */ - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r4, 0x4(r5) -#endif - mtspr SPRN_PID,r3 - isync /* Force context change */ - blr #else #error Unsupported processor type ! #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c new file mode 100644 index 000000000000..0876216ceee6 --- /dev/null +++ b/arch/powerpc/mm/pageattr.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * MMU-generic set_memory implementation for powerpc + * + * Copyright 2019-2021, IBM Corporation. + */ + +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/set_memory.h> + +#include <asm/mmu.h> +#include <asm/page.h> +#include <asm/pgtable.h> + + +/* + * Updates the attributes of a page in three steps: + * + * 1. invalidate the page table entry + * 2. flush the TLB + * 3. install the new entry with the updated attributes + * + * Invalidating the pte means there are situations where this will not work + * when in theory it should. + * For example: + * - removing write from page whilst it is being executed + * - setting a page read-only whilst it is being read by another CPU + * + */ +static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + long action = (long)data; + pte_t pte; + + spin_lock(&init_mm.page_table_lock); + + /* invalidate the PTE so it's safe to modify */ + pte = ptep_get_and_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + /* modify the PTE bits as desired, then apply */ + switch (action) { + case SET_MEMORY_RO: + pte = pte_wrprotect(pte); + break; + case SET_MEMORY_RW: + pte = pte_mkwrite(pte_mkdirty(pte)); + break; + case SET_MEMORY_NX: + pte = pte_exprotect(pte); + break; + case SET_MEMORY_X: + pte = pte_mkexec(pte); + break; + default: + WARN_ON_ONCE(1); + break; + } + + set_pte_at(&init_mm, addr, ptep, pte); + + /* See ptesync comment in radix__set_pte_at() */ + if (radix_enabled()) + asm volatile("ptesync": : :"memory"); + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int change_memory_attr(unsigned long addr, int numpages, long action) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long size = numpages * PAGE_SIZE; + + if (!numpages) + return 0; + + if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) && + is_vm_area_hugepages((void *)addr))) + return -EINVAL; + +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * On hash, the linear mapping is not in the Linux page table so + * apply_to_existing_page_range() will have no effect. If in the future + * the set_memory_* functions are used on the linear map this will need + * to be updated. + */ + if (!radix_enabled()) { + int region = get_region_id(addr); + + if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID)) + return -EINVAL; + } +#endif + + return apply_to_existing_page_range(&init_mm, start, size, + change_page_attr, (void *)action); +} + +/* + * Set the attributes of a page: + * + * This function is used by PPC32 at the end of init to set final kernel memory + * protection. It includes changing the maping of the page it is executing from + * and data pages it is using. + */ +static int set_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + pgprot_t prot = __pgprot((unsigned long)data); + + spin_lock(&init_mm.page_table_lock); + + set_pte_at(&init_mm, addr, ptep, pte_modify(*ptep, prot)); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +int set_memory_attr(unsigned long addr, int numpages, pgprot_t prot) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long sz = numpages * PAGE_SIZE; + + if (numpages <= 0) + return 0; + + return apply_to_existing_page_range(&init_mm, start, sz, set_page_attr, + (void *)pgprot_val(prot)); +} diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 354611940118..cd16b407f47e 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -28,6 +28,14 @@ #include <asm/hugetlb.h> #include <asm/pte-walk.h> +#ifdef CONFIG_PPC64 +#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD) +#else +#define PGD_ALIGN PAGE_SIZE +#endif + +pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN); + static inline int is_exec_fault(void) { return current->thread.regs && TRAP(current->thread.regs) == 0x400; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ec67a16887..dcf5ecca19d9 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -23,6 +23,7 @@ #include <linux/highmem.h> #include <linux/memblock.h> #include <linux/slab.h> +#include <linux/set_memory.h> #include <asm/pgalloc.h> #include <asm/fixmap.h> @@ -132,64 +133,20 @@ void __init mapin_ram(void) } } -static int __change_page_attr_noflush(struct page *page, pgprot_t prot) -{ - pte_t *kpte; - unsigned long address; - - BUG_ON(PageHighMem(page)); - address = (unsigned long)page_address(page); - - if (v_block_mapped(address)) - return 0; - kpte = virt_to_kpte(address); - if (!kpte) - return -EINVAL; - __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - - return 0; -} - -/* - * Change the page attributes of an page in the linear mapping. - * - * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY - */ -static int change_page_attr(struct page *page, int numpages, pgprot_t prot) -{ - int i, err = 0; - unsigned long flags; - struct page *start = page; - - local_irq_save(flags); - for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr_noflush(page, prot); - if (err) - break; - } - wmb(); - local_irq_restore(flags); - flush_tlb_kernel_range((unsigned long)page_address(start), - (unsigned long)page_address(page)); - return err; -} - void mark_initmem_nx(void) { - struct page *page = virt_to_page(_sinittext); unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else - change_page_attr(page, numpages, PAGE_KERNEL); + set_memory_attr((unsigned long)_sinittext, numpages, PAGE_KERNEL); } #ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void) { - struct page *page; unsigned long numpages; if (v_block_mapped((unsigned long)_stext + 1)) { @@ -198,20 +155,18 @@ void mark_rodata_ro(void) return; } - page = virt_to_page(_stext); numpages = PFN_UP((unsigned long)_etext) - PFN_DOWN((unsigned long)_stext); - change_page_attr(page, numpages, PAGE_KERNEL_ROX); + set_memory_attr((unsigned long)_stext, numpages, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ - page = virt_to_page(__start_rodata); numpages = PFN_UP((unsigned long)__init_begin) - PFN_DOWN((unsigned long)__start_rodata); - change_page_attr(page, numpages, PAGE_KERNEL_RO); + set_memory_attr((unsigned long)__start_rodata, numpages, PAGE_KERNEL_RO); // mark_initmem_nx() should have already run by now ptdump_check_wx(); @@ -221,9 +176,14 @@ void mark_rodata_ro(void) #ifdef CONFIG_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { + unsigned long addr = (unsigned long)page_address(page); + if (PageHighMem(page)) return; - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + if (enable) + set_memory_attr(addr, numpages, PAGE_KERNEL); + else + set_memory_attr(addr, numpages, __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index aca354fb670b..5062c58b1e5b 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -58,8 +58,6 @@ struct pg_state { const struct addr_marker *marker; unsigned long start_address; unsigned long start_pa; - unsigned long last_pa; - unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -163,8 +161,6 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info static void dump_addr(struct pg_state *st, unsigned long addr) { - unsigned long delta; - #ifdef CONFIG_PPC64 #define REG "0x%016lx" #else @@ -172,14 +168,8 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { - pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = st->page_size >> 10; - } else { - pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); - delta = (addr - st->start_address) >> 10; - } - pt_dump_size(st->seq, delta); + pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); + pt_dump_size(st->seq, (addr - st->start_address) >> 10); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -208,7 +198,6 @@ static void note_page_update_state(struct pg_state *st, unsigned long addr, st->current_flags = flag; st->start_address = addr; st->start_pa = pa; - st->page_size = page_size; while (addr >= st->marker[1].start_address) { st->marker++; @@ -220,7 +209,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; - u64 pa = val & PTE_RPN_MASK; /* At first no level is set */ if (!st->level) { @@ -232,12 +220,9 @@ static void note_page(struct pg_state *st, unsigned long addr, * - we change levels in the tree. * - the address is in a different section of memory and is thus * used for a different purpose, regardless of the flags. - * - the pa of this page is not adjacent to the last inspected page */ } else if (flag != st->current_flags || level != st->level || - addr >= st->marker[1].start_address || - (pa != st->last_pa + st->page_size && - (pa != st->start_pa || st->start_pa != st->last_pa))) { + addr >= st->marker[1].start_address) { /* Check the PTE flags */ if (st->current_flags) { @@ -259,7 +244,6 @@ static void note_page(struct pg_state *st, unsigned long addr, */ note_page_update_state(st, addr, level, val, page_size); } - st->last_pa = pa; } static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 798ac4350a82..53aefee3fe70 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -237,6 +237,7 @@ skip_codegen_passes: fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + bpf_jit_binary_lock_ro(bpf_hdr); if (!fp->is_func || extra_pass) { bpf_prog_fill_jited_linfo(fp, addrs); out_addrs: @@ -257,15 +258,3 @@ out: return fp; } - -/* Overriding bpf_jit_free() as we don't set images read-only. */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *bpf_hdr = (void *)addr; - - if (fp->jited) - bpf_jit_binary_free(bpf_hdr); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index bbb16099e8c7..cbe5b399ed86 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -108,20 +108,20 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), __REG_R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); - EMIT(PPC_RAW_STWU(__REG_R1, __REG_R1, -BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* * Initialize tail_call_cnt in stack frame if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ - if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - } else { + if (ctx->seen & SEEN_TAILCALL) + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + else EMIT(PPC_RAW_NOP()); - } #define BPF_TAILCALL_PROLOGUE_SIZE 16 @@ -130,30 +130,30 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MFLR(__REG_R0)); + EMIT(PPC_RAW_MFLR(_R0)); /* * Back up non-volatile regs -- registers r18-r31 */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_STW(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, BPF_PPC_STACKFRAME(ctx)) + 12); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), __REG_R1, + EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); } static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) @@ -163,24 +163,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) - EMIT(PPC_RAW_LWZ(i, __REG_R1, bpf_jit_stack_offsetof(ctx, i))); + EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); } void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDI(__REG_R1, __REG_R1, BPF_PPC_STACKFRAME(ctx))); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); EMIT(PPC_RAW_BLR()); } @@ -193,10 +193,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun PPC_BL_ABS(func); } else { /* Load function address into r0 */ - EMIT(PPC_RAW_LIS(__REG_R0, IMM_H(func))); - EMIT(PPC_RAW_ORI(__REG_R0, __REG_R0, IMM_L(func))); - EMIT(PPC_RAW_MTLR(__REG_R0)); - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); + EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func))); + EMIT(PPC_RAW_MTCTR(_R0)); + EMIT(PPC_RAW_BCTRL()); } } @@ -215,47 +215,47 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(__REG_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); - EMIT(PPC_RAW_CMPLW(b2p_index, __REG_R0)); - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); + EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); PPC_BCC(COND_GE, out); /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R0, MAX_TAIL_CALL_CNT)); + EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ - EMIT(PPC_RAW_ADDIC(__REG_R0, __REG_R0, 1)); + EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); PPC_BCC(COND_GT, out); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_RLWINM(__REG_R3, b2p_index, 2, 0, 29)); - EMIT(PPC_RAW_ADD(__REG_R3, __REG_R3, b2p_bpf_array)); - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(__REG_R0, __REG_R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); + EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); + EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLWI(__REG_R3, 0)); + EMIT(PPC_RAW_CMPLWI(_R3, 0)); PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - EMIT(PPC_RAW_LWZ(__REG_R3, __REG_R3, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(__REG_R0, __REG_R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(__REG_R3, __REG_R3, BPF_TAILCALL_PROLOGUE_SIZE)); + EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(__REG_R0)); + EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(__REG_R3)); + EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(__REG_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -352,8 +352,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDIC(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_ADDC(dst_reg, dst_reg, _R0)); } if (imm >= 0) EMIT(PPC_RAW_ADDZE(dst_reg_h, dst_reg_h)); @@ -362,11 +362,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_MULW(__REG_R0, dst_reg, src_reg_h)); + EMIT(PPC_RAW_MULW(_R0, dst_reg, src_reg_h)); EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_MULHWU(tmp_reg, dst_reg, src_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg)); break; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -376,8 +376,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_MULI(dst_reg, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ @@ -398,17 +398,17 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_MULW(dst_reg_h, dst_reg_h, tmp_reg)); if (imm < 0) EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, dst_reg)); - EMIT(PPC_RAW_MULHWU(__REG_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULHWU(_R0, dst_reg, tmp_reg)); EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0)); break; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); break; case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, src_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(_R0, src_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ return -EOPNOTSUPP; @@ -420,8 +420,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm == 1) break; - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, _R0)); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ if (!imm) @@ -430,9 +430,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (!is_power_of_2((u32)imm)) { bpf_set_seen_register(ctx, tmp_reg); PPC_LI32(tmp_reg, imm); - EMIT(PPC_RAW_DIVWU(__REG_R0, dst_reg, tmp_reg)); - EMIT(PPC_RAW_MULW(__REG_R0, tmp_reg, __REG_R0)); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_DIVWU(_R0, dst_reg, tmp_reg)); + EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, _R0)); break; } if (imm == 1) @@ -503,8 +503,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - fls(imm), 32 - ffs(imm))); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, _R0)); } break; case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ @@ -555,12 +555,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SLW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SRW(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_SRW(_R0, dst_reg, _R0)); EMIT(PPC_RAW_SLW(tmp_reg, dst_reg, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0)); EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg)); break; @@ -591,12 +591,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); EMIT(PPC_RAW_SRW(dst_reg_h, dst_reg_h, src_reg)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; @@ -627,15 +627,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ bpf_set_seen_register(ctx, tmp_reg); - EMIT(PPC_RAW_SUBFIC(__REG_R0, src_reg, 32)); + EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32)); EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_SLW(__REG_R0, dst_reg_h, __REG_R0)); + EMIT(PPC_RAW_SLW(_R0, dst_reg_h, _R0)); EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32)); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, __REG_R0)); - EMIT(PPC_RAW_RLWINM(__REG_R0, tmp_reg, 0, 26, 26)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0)); + EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26)); EMIT(PPC_RAW_SRAW(tmp_reg, dst_reg_h, tmp_reg)); EMIT(PPC_RAW_SRAW(dst_reg_h, dst_reg_h, src_reg)); - EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, __REG_R0)); + EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0)); EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg)); break; case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */ @@ -702,24 +702,24 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); break; case 64: bpf_set_seen_register(ctx, tmp_reg); EMIT(PPC_RAW_RLWINM(tmp_reg, dst_reg, 8, 0, 31)); - EMIT(PPC_RAW_RLWINM(__REG_R0, dst_reg_h, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(_R0, dst_reg_h, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 0, 7)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ EMIT(PPC_RAW_RLWIMI(tmp_reg, dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_RLWIMI(__REG_R0, dst_reg_h, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, __REG_R0)); + EMIT(PPC_RAW_RLWIMI(_R0, dst_reg_h, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, _R0)); EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg)); break; } @@ -744,32 +744,32 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STB(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STB(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STH(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STH(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off)); EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4)); break; case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off + 4)); - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_STW(__REG_R0, dst_reg, off)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4)); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_STW(_R0, dst_reg, off)); break; /* @@ -780,11 +780,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); /* load value from memory into r0 */ - EMIT(PPC_RAW_LWARX(__REG_R0, tmp_reg, dst_reg, 0)); + EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(__REG_R0, __REG_R0, src_reg)); + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); /* store result back */ - EMIT(PPC_RAW_STWCX(__REG_R0, tmp_reg, dst_reg)); + EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); break; @@ -852,14 +852,14 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * return ret; if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, __REG_R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), __REG_R1, 12)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); } bpf_jit_emit_func_call_rel(image, ctx, func_addr); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, __REG_R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), __REG_R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); break; /* @@ -967,12 +967,12 @@ cond_branch: EMIT(PPC_RAW_CMPW(dst_reg, src_reg)); break; case BPF_JMP | BPF_JSET | BPF_X: - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg_h, src_reg_h)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP32 | BPF_JSET | BPF_X: { - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, src_reg)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg)); break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JEQ | BPF_K: @@ -990,11 +990,11 @@ cond_branch: EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { /* sign-extending load ... but unsigned comparison */ - PPC_EX32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg_h, __REG_R0)); - PPC_LI32(__REG_R0, imm); + PPC_EX32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0)); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JNE | BPF_K: @@ -1006,8 +1006,8 @@ cond_branch: if (imm >= 0 && imm < 65536) { EMIT(PPC_RAW_CMPLWI(dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; } @@ -1022,9 +1022,9 @@ cond_branch: } else { /* sign-extending load */ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0)); - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); - EMIT(PPC_RAW_CMPLW(dst_reg, __REG_R0)); + EMIT(PPC_RAW_CMPLW(dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSGT | BPF_K: @@ -1039,32 +1039,32 @@ cond_branch: EMIT(PPC_RAW_CMPWI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_CMPW(dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_CMPW(dst_reg, _R0)); } break; case BPF_JMP | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); + PPC_LI32(_R0, imm); if (imm < 0) { EMIT(PPC_RAW_CMPWI(dst_reg_h, 0)); PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4); } - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; case BPF_JMP32 | BPF_JSET | BPF_K: /* andi does not sign-extend the immediate */ if (imm >= -32768 && imm < 32768) { /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(__REG_R0, dst_reg, imm)); + EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm)); } else { - PPC_LI32(__REG_R0, imm); - EMIT(PPC_RAW_AND_DOT(__REG_R0, dst_reg, __REG_R0)); + PPC_LI32(_R0, imm); + EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0)); } break; } diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 57a8c1153851..5cad5b5a7e97 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -94,7 +94,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * save/restore LR unless we call other functions */ if (ctx->seen & SEEN_FUNC) { - EMIT(PPC_INST_MFLR | __PPC_RT(R0)); + EMIT(PPC_RAW_MFLR(_R0)); PPC_BPF_STL(0, 1, PPC_LR_STKOFF); } @@ -153,8 +153,8 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, PPC_LI64(b2p[TMP_REG_2], func); /* Load actual entry point from function descriptor */ PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); + /* ... and move it to CTR */ + EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a @@ -165,9 +165,9 @@ static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, #else /* We can clobber r12 */ PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTLR(12)); + EMIT(PPC_RAW_MTCTR(12)); #endif - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_BCTRL()); } void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) @@ -202,8 +202,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun PPC_BPF_LL(12, 12, 0); #endif - EMIT(PPC_RAW_MTLR(12)); - EMIT(PPC_RAW_BLRL()); + EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_BCTRL()); } static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index c02854dea2b2..2f46e31c7612 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o -ifdef CONFIG_COMPAT -obj-$(CONFIG_PERF_EVENTS) += callchain_32.o -endif +obj-y += callchain.o callchain_$(BITS).o perf_regs.o +obj-$(CONFIG_COMPAT) += callchain_32.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 6c028ee513c0..082f6d0308a4 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -40,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -void +void __no_sanitize_address perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { unsigned long sp, next_sp; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 51622411a7cc..bb0ee716de91 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -460,7 +460,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - return branch_target((struct ppc_inst *)&instr); + return branch_target(&instr); } /* Userspace: need copy instruction here then translate it */ @@ -468,7 +468,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) sizeof(instr))) return 0; - target = branch_target((struct ppc_inst *)&instr); + target = branch_target(&instr); if ((!target) || (instr & BRANCH_ABSOLUTE)) return target; diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index eb8a6aaf4cc1..695975227e60 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -14,45 +14,119 @@ * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ pmc ] [unit ] [ ] m [ pmcxsel ] - * | | - * | *- mark - * | - * | - * *- combine - * - * Below uses IBM bit numbering. - * - * MMCR1[x:y] = unit (PMCxUNIT) - * MMCR1[24] = pmc1combine[0] - * MMCR1[25] = pmc1combine[1] - * MMCR1[26] = pmc2combine[0] - * MMCR1[27] = pmc2combine[1] - * MMCR1[28] = pmc3combine[0] - * MMCR1[29] = pmc3combine[1] - * MMCR1[30] = pmc4combine[0] - * MMCR1[31] = pmc4combine[1] - * + * [ pmc ] [ pmcxsel ] */ /* - * Some power9 event codes. + * Event codes defined in ISA v3.0B */ #define EVENT(_name, _code) _name = _code, enum { -EVENT(PM_CYC, 0x0001e) -EVENT(PM_INST_CMPL, 0x00002) + /* Cycles, alternate code */ + EVENT(PM_CYC_ALT, 0x100f0) + /* One or more instructions completed in a cycle */ + EVENT(PM_CYC_INST_CMPL, 0x100f2) + /* Floating-point instruction completed */ + EVENT(PM_FLOP_CMPL, 0x100f4) + /* Instruction ERAT/L1-TLB miss */ + EVENT(PM_L1_ITLB_MISS, 0x100f6) + /* All instructions completed and none available */ + EVENT(PM_NO_INST_AVAIL, 0x100f8) + /* A load-type instruction completed (ISA v3.0+) */ + EVENT(PM_LD_CMPL, 0x100fc) + /* Instruction completed, alternate code (ISA v3.0+) */ + EVENT(PM_INST_CMPL_ALT, 0x100fe) + /* A store-type instruction completed */ + EVENT(PM_ST_CMPL, 0x200f0) + /* Instruction Dispatched */ + EVENT(PM_INST_DISP, 0x200f2) + /* Run_cycles */ + EVENT(PM_RUN_CYC, 0x200f4) + /* Data ERAT/L1-TLB miss/reload */ + EVENT(PM_L1_DTLB_RELOAD, 0x200f6) + /* Taken branch completed */ + EVENT(PM_BR_TAKEN_CMPL, 0x200fa) + /* Demand iCache Miss */ + EVENT(PM_L1_ICACHE_MISS, 0x200fc) + /* L1 Dcache reload from memory */ + EVENT(PM_L1_RELOAD_FROM_MEM, 0x200fe) + /* L1 Dcache store miss */ + EVENT(PM_ST_MISS_L1, 0x300f0) + /* Alternate code for PM_INST_DISP */ + EVENT(PM_INST_DISP_ALT, 0x300f2) + /* Branch direction or target mispredicted */ + EVENT(PM_BR_MISPREDICT, 0x300f6) + /* Data TLB miss/reload */ + EVENT(PM_DTLB_MISS, 0x300fc) + /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ + EVENT(PM_DATA_FROM_L3MISS, 0x300fe) + /* L1 Dcache load miss */ + EVENT(PM_LD_MISS_L1, 0x400f0) + /* Cycle when instruction(s) dispatched */ + EVENT(PM_CYC_INST_DISP, 0x400f2) + /* Branch or branch target mispredicted */ + EVENT(PM_BR_MPRED_CMPL, 0x400f6) + /* Instructions completed with run latch set */ + EVENT(PM_RUN_INST_CMPL, 0x400fa) + /* Instruction TLB miss/reload */ + EVENT(PM_ITLB_MISS, 0x400fc) + /* Load data not cached */ + EVENT(PM_LD_NOT_CACHED, 0x400fe) + /* Instructions */ + EVENT(PM_INST_CMPL, 0x500fa) + /* Cycles */ + EVENT(PM_CYC, 0x600f4) }; #undef EVENT +/* Table of alternatives, sorted in increasing order of column 0 */ +/* Note that in each row, column 0 must be the smallest */ +static const unsigned int generic_event_alternatives[][MAX_ALT] = { + { PM_CYC_ALT, PM_CYC }, + { PM_INST_CMPL_ALT, PM_INST_CMPL }, + { PM_INST_DISP, PM_INST_DISP_ALT }, +}; + +static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(generic_event_alternatives), flags, + generic_event_alternatives); + + return num_alt; +} + GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_NO_INST_AVAIL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); static struct attribute *generic_compat_events_attr[] = { GENERIC_EVENT_PTR(PM_CYC), GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_NO_INST_AVAIL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), NULL }; @@ -63,17 +137,11 @@ static struct attribute_group generic_compat_pmu_events_group = { PMU_FORMAT_ATTR(event, "config:0-19"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); -PMU_FORMAT_ATTR(mark, "config:8"); -PMU_FORMAT_ATTR(combine, "config:10-11"); -PMU_FORMAT_ATTR(unit, "config:12-15"); PMU_FORMAT_ATTR(pmc, "config:16-19"); static struct attribute *generic_compat_pmu_format_attr[] = { &format_attr_event.attr, &format_attr_pmcxsel.attr, - &format_attr_mark.attr, - &format_attr_combine.attr, - &format_attr_unit.attr, &format_attr_pmc.attr, NULL, }; @@ -92,6 +160,9 @@ static const struct attribute_group *generic_compat_pmu_attr_groups[] = { static int compat_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_NO_INST_AVAIL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -105,11 +176,11 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_LD_MISS_L1, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_ST_MISS_L1, }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -119,7 +190,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1I) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -133,7 +204,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(LL) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -147,7 +218,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_DTLB_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -161,7 +232,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_ITLB_MISS, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -175,7 +246,7 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(BPU) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, + [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -204,13 +275,30 @@ static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { #undef C +/* + * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for + * PM_INST_CMPL and PM_CYC. + */ +static int generic_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[], u32 flags) +{ + int ret; + + ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags); + if (!ret) + mmcr->mmcr0 |= MMCR0_C56RUN; + return ret; +} + static struct power_pmu generic_compat_pmu = { .name = "GENERIC_COMPAT", .n_counter = MAX_PMU_COUNTERS, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, - .compute_mmcr = isa207_compute_mmcr, + .compute_mmcr = generic_compute_mmcr, .get_constraint = isa207_get_constraint, + .get_alternatives = generic_get_alternatives, .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(compat_generic_events), @@ -223,6 +311,16 @@ int init_generic_compat_pmu(void) { int rc = 0; + /* + * From ISA v2.07 on, PMU features are architected; + * we require >= v3.0 because (a) that has PM_LD_CMPL and + * PM_INST_CMPL_ALT, which v2.07 doesn't have, and + * (b) we don't expect any non-IBM Power ISA + * implementations that conform to v2.07 but not v3.0. + */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + rc = register_power_pmu(&generic_compat_pmu); if (rc) return rc; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 8c0d324f657e..3823df235f25 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -582,6 +582,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd, if (ret) break; /* fall through and return the timeout */ + fallthrough; case WDIOC_GETTIMEOUT: /* we need to round here as to avoid e.g. the following diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c index 87f524e4b09c..8a7e55acf090 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c @@ -73,7 +73,7 @@ smp_86xx_kick_cpu(int nr) /* Setup fake reset vector to call __secondary_start_mpc86xx. */ target = (unsigned long) __secondary_start_mpc86xx; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Kick that CPU */ smp_86xx_release_core(nr); @@ -83,7 +83,7 @@ smp_86xx_kick_cpu(int nr) mdelay(1); /* Restore the exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 7a5e8f4541e3..e02d29a9d12f 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -20,6 +20,8 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" +source "arch/powerpc/platforms/book3s/Kconfig" +source "arch/powerpc/platforms/microwatt/Kconfig" config KVM_GUEST bool "KVM Guest support" @@ -49,6 +51,7 @@ config PPC_NATIVE config PPC_OF_BOOT_TRAMPOLINE bool "Support booting from Open Firmware or yaboot" depends on PPC_BOOK3S_32 || PPC64 + select RELOCATABLE if PPC64 default y help Support from booting from Open Firmware or yaboot using an diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7d271de8fcbd..0e1bb1cedd13 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -61,6 +61,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT + select PPC_HAVE_KUEP endchoice @@ -390,7 +391,7 @@ config PPC_HAVE_KUEP config PPC_KUEP bool "Kernel Userspace Execution Prevention" depends on PPC_HAVE_KUEP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Execution Prevention (KUEP) @@ -402,7 +403,7 @@ config PPC_HAVE_KUAP config PPC_KUAP bool "Kernel Userspace Access Protection" depends on PPC_HAVE_KUAP - default y if !PPC_BOOK3S_32 + default y help Enable support for Kernel Userspace Access Protection (KUAP) @@ -425,10 +426,6 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S -config PPC_MMU_NOHASH_32 - def_bool y - depends on PPC_MMU_NOHASH && PPC32 - config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E @@ -477,9 +474,9 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-8192)" - range 2 8192 - depends on SMP + int "Maximum number of CPUs (2-8192)" if SMP + range 2 8192 if SMP + default "1" if !SMP default "32" if PPC64 default "4" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 143d4417f6cc..94470fb27c99 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,3 +22,5 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ +obj-$(CONFIG_PPC_BOOK3S) += book3s/ +obj-$(CONFIG_PPC_MICROWATT) += microwatt/ diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig new file mode 100644 index 000000000000..34c931592ef0 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_VAS + bool "IBM Virtual Accelerator Switchboard (VAS)" + depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES + default y + help + This enables support for IBM Virtual Accelerator Switchboard (VAS). + + VAS devices are found in POWER9-based and later systems, they + provide access to accelerator coprocessors such as NX-GZIP and + NX-842. This config allows the kernel to use NX-842 accelerators, + and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV + and POWER10 PowerVM platforms. + + If unsure, say "N". diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile new file mode 100644 index 000000000000..e790f1910f61 --- /dev/null +++ b/arch/powerpc/platforms/book3s/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_PPC_VAS) += vas-api.o diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c new file mode 100644 index 000000000000..30172e52e16b --- /dev/null +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * VAS user space API for its accelerators (Only NX-GZIP is supported now) + * Copyright (C) 2019 Haren Myneni, IBM Corp + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kthread.h> +#include <linux/sched/signal.h> +#include <linux/mmu_context.h> +#include <linux/io.h> +#include <asm/vas.h> +#include <uapi/asm/vas-api.h> + +/* + * The driver creates the device node that can be used as follows: + * For NX-GZIP + * + * fd = open("/dev/crypto/nx-gzip", O_RDWR); + * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); + * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). + * vas_copy(&crb, 0, 1); + * vas_paste(paste_addr, 0, 1); + * close(fd) or exit process to close window. + * + * where "vas_copy" and "vas_paste" are defined in copy-paste.h. + * copy/paste returns to the user space directly. So refer NX hardware + * documententation for exact copy/paste usage and completion / error + * conditions. + */ + +/* + * Wrapper object for the nx-gzip device - there is just one instance of + * this node for the whole system. + */ +static struct coproc_dev { + struct cdev cdev; + struct device *device; + char *name; + dev_t devt; + struct class *class; + enum vas_cop_type cop_type; + const struct vas_user_win_ops *vops; +} coproc_device; + +struct coproc_instance { + struct coproc_dev *coproc; + struct vas_window *txwin; +}; + +static char *coproc_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); +} + +/* + * Take reference to pid and mm + */ +int get_vas_user_win_ref(struct vas_user_win_ref *task_ref) +{ + /* + * Window opened by a child thread may not be closed when + * it exits. So take reference to its pid and release it + * when the window is free by parent thread. + * Acquire a reference to the task's pid to make sure + * pid will not be re-used - needed only for multithread + * applications. + */ + task_ref->pid = get_task_pid(current, PIDTYPE_PID); + /* + * Acquire a reference to the task's mm. + */ + task_ref->mm = get_task_mm(current); + if (!task_ref->mm) { + put_pid(task_ref->pid); + pr_err("VAS: pid(%d): mm_struct is not found\n", + current->pid); + return -EPERM; + } + + mmgrab(task_ref->mm); + mmput(task_ref->mm); + /* + * Process closes window during exit. In the case of + * multithread application, the child thread can open + * window and can exit without closing it. So takes tgid + * reference until window closed to make sure tgid is not + * reused. + */ + task_ref->tgid = find_get_pid(task_tgid_vnr(current)); + + return 0; +} + +/* + * Successful return must release the task reference with + * put_task_struct + */ +static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, + struct task_struct **tskp, struct pid **pidp) +{ + struct task_struct *tsk; + struct pid *pid; + + pid = task_ref->pid; + tsk = get_pid_task(pid, PIDTYPE_PID); + if (!tsk) { + pid = task_ref->tgid; + tsk = get_pid_task(pid, PIDTYPE_PID); + /* + * Parent thread (tgid) will be closing window when it + * exits. So should not get here. + */ + if (WARN_ON_ONCE(!tsk)) + return false; + } + + /* Return if the task is exiting. */ + if (tsk->flags & PF_EXITING) { + put_task_struct(tsk); + return false; + } + + *tskp = tsk; + *pidp = pid; + + return true; +} + +/* + * Update the CSB to indicate a translation error. + * + * User space will be polling on CSB after the request is issued. + * If NX can handle the request without any issues, it updates CSB. + * Whereas if NX encounters page fault, the kernel will handle the + * fault and update CSB with translation error. + * + * If we are unable to update the CSB means copy_to_user failed due to + * invalid csb_addr, send a signal to the process. + */ +void vas_update_csb(struct coprocessor_request_block *crb, + struct vas_user_win_ref *task_ref) +{ + struct coprocessor_status_block csb; + struct kernel_siginfo info; + struct task_struct *tsk; + void __user *csb_addr; + struct pid *pid; + int rc; + + /* + * NX user space windows can not be opened for task->mm=NULL + * and faults will not be generated for kernel requests. + */ + if (WARN_ON_ONCE(!task_ref->mm)) + return; + + csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); + + memset(&csb, 0, sizeof(csb)); + csb.cc = CSB_CC_FAULT_ADDRESS; + csb.ce = CSB_CE_TERMINATION; + csb.cs = 0; + csb.count = 0; + + /* + * NX operates and returns in BE format as defined CRB struct. + * So saves fault_storage_addr in BE as NX pastes in FIFO and + * expects user space to convert to CPU format. + */ + csb.address = crb->stamp.nx.fault_storage_addr; + csb.flags = 0; + + /* + * Process closes send window after all pending NX requests are + * completed. In multi-thread applications, a child thread can + * open a window and can exit without closing it. May be some + * requests are pending or this window can be used by other + * threads later. We should handle faults if NX encounters + * pages faults on these requests. Update CSB with translation + * error and fault address. If csb_addr passed by user space is + * invalid, send SEGV signal to pid saved in window. If the + * child thread is not running, send the signal to tgid. + * Parent thread (tgid) will close this window upon its exit. + * + * pid and mm references are taken when window is opened by + * process (pid). So tgid is used only when child thread opens + * a window and exits without closing it. + */ + + if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) + return; + + kthread_use_mm(task_ref->mm); + rc = copy_to_user(csb_addr, &csb, sizeof(csb)); + /* + * User space polls on csb.flags (first byte). So add barrier + * then copy first byte with csb flags update. + */ + if (!rc) { + csb.flags = CSB_V; + /* Make sure update to csb.flags is visible now */ + smp_mb(); + rc = copy_to_user(csb_addr, &csb, sizeof(u8)); + } + kthread_unuse_mm(task_ref->mm); + put_task_struct(tsk); + + /* Success */ + if (!rc) + return; + + + pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", + csb_addr, pid_vnr(pid)); + + clear_siginfo(&info); + info.si_signo = SIGSEGV; + info.si_errno = EFAULT; + info.si_code = SEGV_MAPERR; + info.si_addr = csb_addr; + /* + * process will be polling on csb.flags after request is sent to + * NX. So generally CSB update should not fail except when an + * application passes invalid csb_addr. So an error message will + * be displayed and leave it to user space whether to ignore or + * handle this signal. + */ + rcu_read_lock(); + rc = kill_pid_info(SIGSEGV, &info, pid); + rcu_read_unlock(); + + pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, + pid_vnr(pid), rc); +} + +void vas_dump_crb(struct coprocessor_request_block *crb) +{ + struct data_descriptor_entry *dde; + struct nx_fault_stamp *nx; + + dde = &crb->source; + pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + dde = &crb->target; + pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", + be64_to_cpu(dde->address), be32_to_cpu(dde->length), + dde->count, dde->index, dde->flags); + + nx = &crb->stamp.nx; + pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", + be32_to_cpu(nx->pswid), + be64_to_cpu(crb->stamp.nx.fault_storage_addr), + nx->flags, nx->fault_status); +} + +static int coproc_open(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst; + + cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); + if (!cp_inst) + return -ENOMEM; + + cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, + cdev); + fp->private_data = cp_inst; + + return 0; +} + +static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) +{ + void __user *uptr = (void __user *)arg; + struct vas_tx_win_open_attr uattr; + struct coproc_instance *cp_inst; + struct vas_window *txwin; + int rc; + + cp_inst = fp->private_data; + + /* + * One window for file descriptor + */ + if (cp_inst->txwin) + return -EEXIST; + + rc = copy_from_user(&uattr, uptr, sizeof(uattr)); + if (rc) { + pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); + return -EFAULT; + } + + if (uattr.version != 1) { + pr_err("Invalid window open API version\n"); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->open_win) { + pr_err("VAS API is not registered\n"); + return -EACCES; + } + + txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags, + cp_inst->coproc->cop_type); + if (IS_ERR(txwin)) { + pr_err("%s() VAS window open failed, %ld\n", __func__, + PTR_ERR(txwin)); + return PTR_ERR(txwin); + } + + cp_inst->txwin = txwin; + + return 0; +} + +static int coproc_release(struct inode *inode, struct file *fp) +{ + struct coproc_instance *cp_inst = fp->private_data; + int rc; + + if (cp_inst->txwin) { + if (cp_inst->coproc->vops && + cp_inst->coproc->vops->close_win) { + rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); + if (rc) + return rc; + } + cp_inst->txwin = NULL; + } + + kfree(cp_inst); + fp->private_data = NULL; + + /* + * We don't know here if user has other receive windows + * open, so we can't really call clear_thread_tidr(). + * So, once the process calls set_thread_tidr(), the + * TIDR value sticks around until process exits, resulting + * in an extra copy in restore_sprs(). + */ + + return 0; +} + +static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct coproc_instance *cp_inst = fp->private_data; + struct vas_window *txwin; + unsigned long pfn; + u64 paste_addr; + pgprot_t prot; + int rc; + + txwin = cp_inst->txwin; + + if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { + pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, + (vma->vm_end - vma->vm_start), PAGE_SIZE); + return -EINVAL; + } + + /* Ensure instance has an open send window */ + if (!txwin) { + pr_err("%s(): No send window open?\n", __func__); + return -EINVAL; + } + + if (!cp_inst->coproc->vops && !cp_inst->coproc->vops->paste_addr) { + pr_err("%s(): VAS API is not registered\n", __func__); + return -EACCES; + } + + paste_addr = cp_inst->coproc->vops->paste_addr(txwin); + if (!paste_addr) { + pr_err("%s(): Window paste address failed\n", __func__); + return -EINVAL; + } + + pfn = paste_addr >> PAGE_SHIFT; + + /* flags, page_prot from cxl_mmap(), except we want cachable */ + vma->vm_flags |= VM_IO | VM_PFNMAP; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + + prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); + + rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, prot); + + pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, + paste_addr, vma->vm_start, rc); + + return rc; +} + +static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case VAS_TX_WIN_OPEN: + return coproc_ioc_tx_win_open(fp, arg); + default: + return -EINVAL; + } +} + +static struct file_operations coproc_fops = { + .open = coproc_open, + .release = coproc_release, + .mmap = coproc_mmap, + .unlocked_ioctl = coproc_ioctl, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, + const char *name, + const struct vas_user_win_ops *vops) +{ + int rc = -EINVAL; + dev_t devno; + + rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); + if (rc) { + pr_err("Unable to allocate coproc major number: %i\n", rc); + return rc; + } + + pr_devel("%s device allocated, dev [%i,%i]\n", name, + MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); + + coproc_device.class = class_create(mod, name); + if (IS_ERR(coproc_device.class)) { + rc = PTR_ERR(coproc_device.class); + pr_err("Unable to create %s class %d\n", name, rc); + goto err_class; + } + coproc_device.class->devnode = coproc_devnode; + coproc_device.cop_type = cop_type; + coproc_device.vops = vops; + + coproc_fops.owner = mod; + cdev_init(&coproc_device.cdev, &coproc_fops); + + devno = MKDEV(MAJOR(coproc_device.devt), 0); + rc = cdev_add(&coproc_device.cdev, devno, 1); + if (rc) { + pr_err("cdev_add() failed %d\n", rc); + goto err_cdev; + } + + coproc_device.device = device_create(coproc_device.class, NULL, + devno, NULL, name, MINOR(devno)); + if (IS_ERR(coproc_device.device)) { + rc = PTR_ERR(coproc_device.device); + pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); + goto err; + } + + pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), + MINOR(devno)); + + return 0; + +err: + cdev_del(&coproc_device.cdev); +err_cdev: + class_destroy(coproc_device.class); +err_class: + unregister_chrdev_region(coproc_device.devt, 1); + return rc; +} + +void vas_unregister_coproc_api(void) +{ + dev_t devno; + + cdev_del(&coproc_device.cdev); + devno = MKDEV(MAJOR(coproc_device.devt), 0); + device_destroy(coproc_device.class, devno); + + class_destroy(coproc_device.class); + unregister_chrdev_region(coproc_device.devt, 1); +} diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index 93ea41680f54..a1c293f42a1f 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -25,10 +25,9 @@ struct spiderpci_iowa_private { static void spiderpci_io_flush(struct iowa_bus *bus) { struct spiderpci_iowa_private *priv; - u32 val; priv = bus->private; - val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); iosync(); } diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index d56b4e3241cd..b41e81b22fdc 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -1657,14 +1657,13 @@ static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu) static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_problem __iomem *prob = spu->problem; - u32 dummy = 0; /* Restore, Step 66: * If CSA.MB_Stat[P]=0 (mailbox empty) then * read from the PPU_MB register. */ if ((csa->prob.mb_stat_R & 0xFF) == 0) { - dummy = in_be32(&prob->pu_mb_R); + in_be32(&prob->pu_mb_R); eieio(); } } @@ -1672,14 +1671,13 @@ static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu) static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu) { struct spu_priv2 __iomem *priv2 = spu->priv2; - u64 dummy = 0UL; /* Restore, Step 66: * If CSA.MB_Stat[I]=0 (mailbox empty) then * read from the PPUINT_MB register. */ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) { - dummy = in_be64(&priv2->puint_mb_R); + in_be64(&priv2->puint_mb_R); eieio(); spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR); eieio(); diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 53065d564161..85521b3e7098 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -251,8 +251,8 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 5565647dc879..d8da6a483e59 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -173,8 +173,8 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) /* Are we prepared to handle this fault */ if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } return 0; diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig new file mode 100644 index 000000000000..8f6a81978461 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +config PPC_MICROWATT + depends on PPC_BOOK3S_64 && !SMP + bool "Microwatt SoC platform" + select PPC_XICS + select PPC_ICS_NATIVE + select PPC_ICP_NATIVE + select PPC_NATIVE + select PPC_UDBG_16550 + select ARCH_RANDOM + help + This option enables support for FPGA-based Microwatt implementations. + diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile new file mode 100644 index 000000000000..116d6d3ad3f0 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/Makefile @@ -0,0 +1 @@ +obj-y += setup.o rng.o diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c new file mode 100644 index 000000000000..3d8ee6eb7dad --- /dev/null +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/powerpc/platforms/powernv/rng.c, which is: + * Copyright 2013, Michael Ellerman, IBM Corporation. + */ + +#define pr_fmt(fmt) "microwatt-rng: " fmt + +#include <linux/kernel.h> +#include <linux/smp.h> +#include <asm/archrandom.h> +#include <asm/cputable.h> +#include <asm/machdep.h> + +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul + +int microwatt_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static __init int rng_init(void) +{ + unsigned long val; + int i; + + for (i = 0; i < 10; i++) { + if (microwatt_get_random_darn(&val)) { + ppc_md.get_random_seed = microwatt_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} +machine_subsys_initcall(, rng_init); diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c new file mode 100644 index 000000000000..0b02603bdb74 --- /dev/null +++ b/arch/powerpc/platforms/microwatt/setup.c @@ -0,0 +1,41 @@ +/* + * Microwatt FPGA-based SoC platform setup code. + * + * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_platform.h> + +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/xics.h> +#include <asm/udbg.h> + +static void __init microwatt_init_IRQ(void) +{ + xics_init(); +} + +static int __init microwatt_probe(void) +{ + return of_machine_is_compatible("microwatt-soc"); +} + +static int __init microwatt_populate(void) +{ + return of_platform_default_populate(NULL, NULL, NULL); +} +machine_arch_initcall(microwatt, microwatt_populate); + +define_machine(microwatt) { + .name = "microwatt", + .probe = microwatt_probe, + .init_IRQ = microwatt_init_IRQ, + .progress = udbg_progress, + .calibrate_decr = generic_calibrate_decr, +}; diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 1c954c90b0f4..9b88e3cded7d 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -37,7 +37,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) */ if (regs->msr & SRR1_WAKEMASK) - regs->nip = regs->link; + regs_set_return_ip(regs, regs->link); switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: @@ -58,7 +58,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) restore_astate(hard_smp_processor_id()); /* everything handled */ - regs->msr |= MSR_RI; + regs_set_return_msr(regs, regs->msr | MSR_RI); return 1; } diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index 9d4ecd292255..d20ef35e6d9d 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -433,7 +433,7 @@ static void __init btext_welcome(boot_infos_t *bi) bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase); bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase); bootx_printf(" (log)"); - bootx_printf("\nklimit : 0x%x",(unsigned long)klimit); + bootx_printf("\nklimit : 0x%x",(unsigned long)_end); bootx_printf("\nboot_info at : 0x%x", bi); __asm__ __volatile__ ("mfmsr %0" : "=r" (flags)); bootx_printf("\nMSR : 0x%x", flags); diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index adae2a6712e1..bdfea6d6ab69 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -810,7 +810,7 @@ static int smp_core99_kick_cpu(int nr) * b __secondary_start_pmac_0 + nr*8 */ target = (unsigned long) __secondary_start_pmac_0 + nr * 8; - patch_branch((struct ppc_inst *)vector, target, BRANCH_SET_LINK); + patch_branch(vector, target, BRANCH_SET_LINK); /* Put some life in our friend */ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); @@ -823,7 +823,7 @@ static int smp_core99_kick_cpu(int nr) mdelay(1); /* Restore our exception vector */ - patch_instruction((struct ppc_inst *)vector, ppc_inst(save_vector)); + patch_instruction(vector, ppc_inst(save_vector)); local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 619b093a0657..043eefbbdd28 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -33,20 +33,6 @@ config PPC_MEMTRACE Enabling this option allows for runtime allocation of memory (RAM) for hardware tracing. -config PPC_VAS - bool "IBM Virtual Accelerator Switchboard (VAS)" - depends on PPC_POWERNV && PPC_64K_PAGES - default y - help - This enables support for IBM Virtual Accelerator Switchboard (VAS). - - VAS allows accelerators in co-processors like NX-GZIP and NX-842 - to be accessible to kernel subsystems and user processes. - - VAS adapters are found in POWER9 based systems. - - If unsure, say N. - config SCOM_DEBUGFS bool "Expose SCOM controllers via debugfs" depends on DEBUG_FS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index be2546b96816..dc7b37c23b60 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o obj-$(CONFIG_PERF_EVENTS) += opal-imc.o obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o -obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o vas-api.o +obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 01401e3da7ca..f812c74c61e5 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/percpu.h> #include <linux/jump_label.h> +#include <asm/interrupt.h> #include <asm/opal-api.h> #include <asm/trace.h> #include <asm/asm-prototypes.h> @@ -100,6 +101,9 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, bool mmu = (msr & (MSR_IR|MSR_DR)); int64_t ret; + /* OPAL call / firmware may use SRR and/or HSRR */ + srr_regs_clobbered(); + msr &= ~MSR_EE; if (unlikely(!mmu)) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 303d7c775740..2835376e61a4 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -773,7 +773,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs) * Setup regs->nip to rfi into fixup address. */ if (recover_addr) - regs->nip = recover_addr; + regs_set_return_ip(regs, recover_addr); out: return !!recover_addr; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b18468dc31ff..6bb3c52633fb 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -711,7 +711,7 @@ int pnv_pci_cfg_write(struct pci_dn *pdn, return PCIBIOS_SUCCESSFUL; } -#if CONFIG_EEH +#ifdef CONFIG_EEH static bool pnv_pci_cfg_check(struct pci_dn *pdn) { struct eeh_dev *edev = NULL; diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 73207b53dc2b..7e98b00ea2e8 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -169,6 +169,16 @@ static void update_hid_in_slw(u64 hid0) } } +static inline void update_power8_hid0(unsigned long hid0) +{ + /* + * The HID0 update on Power8 should at the very least be + * preceded by a SYNC instruction followed by an ISYNC + * instruction + */ + asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0)); +} + static void unsplit_core(void) { u64 hid0, mask; diff --git a/arch/powerpc/platforms/powernv/vas-api.c b/arch/powerpc/platforms/powernv/vas-api.c deleted file mode 100644 index 98ed5d8c5441..000000000000 --- a/arch/powerpc/platforms/powernv/vas-api.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * VAS user space API for its accelerators (Only NX-GZIP is supported now) - * Copyright (C) 2019 Haren Myneni, IBM Corp - */ - -#include <linux/kernel.h> -#include <linux/device.h> -#include <linux/cdev.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <asm/vas.h> -#include <uapi/asm/vas-api.h> -#include "vas.h" - -/* - * The driver creates the device node that can be used as follows: - * For NX-GZIP - * - * fd = open("/dev/crypto/nx-gzip", O_RDWR); - * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); - * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). - * vas_copy(&crb, 0, 1); - * vas_paste(paste_addr, 0, 1); - * close(fd) or exit process to close window. - * - * where "vas_copy" and "vas_paste" are defined in copy-paste.h. - * copy/paste returns to the user space directly. So refer NX hardware - * documententation for exact copy/paste usage and completion / error - * conditions. - */ - -/* - * Wrapper object for the nx-gzip device - there is just one instance of - * this node for the whole system. - */ -static struct coproc_dev { - struct cdev cdev; - struct device *device; - char *name; - dev_t devt; - struct class *class; - enum vas_cop_type cop_type; -} coproc_device; - -struct coproc_instance { - struct coproc_dev *coproc; - struct vas_window *txwin; -}; - -static char *coproc_devnode(struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev)); -} - -static int coproc_open(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst; - - cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL); - if (!cp_inst) - return -ENOMEM; - - cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev, - cdev); - fp->private_data = cp_inst; - - return 0; -} - -static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg) -{ - void __user *uptr = (void __user *)arg; - struct vas_tx_win_attr txattr = {}; - struct vas_tx_win_open_attr uattr; - struct coproc_instance *cp_inst; - struct vas_window *txwin; - int rc, vasid; - - cp_inst = fp->private_data; - - /* - * One window for file descriptor - */ - if (cp_inst->txwin) - return -EEXIST; - - rc = copy_from_user(&uattr, uptr, sizeof(uattr)); - if (rc) { - pr_err("%s(): copy_from_user() returns %d\n", __func__, rc); - return -EFAULT; - } - - if (uattr.version != 1) { - pr_err("Invalid version\n"); - return -EINVAL; - } - - vasid = uattr.vas_id; - - vas_init_tx_win_attr(&txattr, cp_inst->coproc->cop_type); - - txattr.lpid = mfspr(SPRN_LPID); - txattr.pidr = mfspr(SPRN_PID); - txattr.user_win = true; - txattr.rsvd_txbuf_count = false; - txattr.pswid = false; - - pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, - mfspr(SPRN_PID)); - - txwin = vas_tx_win_open(vasid, cp_inst->coproc->cop_type, &txattr); - if (IS_ERR(txwin)) { - pr_err("%s() vas_tx_win_open() failed, %ld\n", __func__, - PTR_ERR(txwin)); - return PTR_ERR(txwin); - } - - cp_inst->txwin = txwin; - - return 0; -} - -static int coproc_release(struct inode *inode, struct file *fp) -{ - struct coproc_instance *cp_inst = fp->private_data; - - if (cp_inst->txwin) { - vas_win_close(cp_inst->txwin); - cp_inst->txwin = NULL; - } - - kfree(cp_inst); - fp->private_data = NULL; - - /* - * We don't know here if user has other receive windows - * open, so we can't really call clear_thread_tidr(). - * So, once the process calls set_thread_tidr(), the - * TIDR value sticks around until process exits, resulting - * in an extra copy in restore_sprs(). - */ - - return 0; -} - -static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct coproc_instance *cp_inst = fp->private_data; - struct vas_window *txwin; - unsigned long pfn; - u64 paste_addr; - pgprot_t prot; - int rc; - - txwin = cp_inst->txwin; - - if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { - pr_debug("%s(): size 0x%zx, PAGE_SIZE 0x%zx\n", __func__, - (vma->vm_end - vma->vm_start), PAGE_SIZE); - return -EINVAL; - } - - /* Ensure instance has an open send window */ - if (!txwin) { - pr_err("%s(): No send window open?\n", __func__); - return -EINVAL; - } - - vas_win_paste_addr(txwin, &paste_addr, NULL); - pfn = paste_addr >> PAGE_SHIFT; - - /* flags, page_prot from cxl_mmap(), except we want cachable */ - vma->vm_flags |= VM_IO | VM_PFNMAP; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - - prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY); - - rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff, - vma->vm_end - vma->vm_start, prot); - - pr_devel("%s(): paste addr %llx at %lx, rc %d\n", __func__, - paste_addr, vma->vm_start, rc); - - return rc; -} - -static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - case VAS_TX_WIN_OPEN: - return coproc_ioc_tx_win_open(fp, arg); - default: - return -EINVAL; - } -} - -static struct file_operations coproc_fops = { - .open = coproc_open, - .release = coproc_release, - .mmap = coproc_mmap, - .unlocked_ioctl = coproc_ioctl, -}; - -/* - * Supporting only nx-gzip coprocessor type now, but this API code - * extended to other coprocessor types later. - */ -int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, - const char *name) -{ - int rc = -EINVAL; - dev_t devno; - - rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); - if (rc) { - pr_err("Unable to allocate coproc major number: %i\n", rc); - return rc; - } - - pr_devel("%s device allocated, dev [%i,%i]\n", name, - MAJOR(coproc_device.devt), MINOR(coproc_device.devt)); - - coproc_device.class = class_create(mod, name); - if (IS_ERR(coproc_device.class)) { - rc = PTR_ERR(coproc_device.class); - pr_err("Unable to create %s class %d\n", name, rc); - goto err_class; - } - coproc_device.class->devnode = coproc_devnode; - coproc_device.cop_type = cop_type; - - coproc_fops.owner = mod; - cdev_init(&coproc_device.cdev, &coproc_fops); - - devno = MKDEV(MAJOR(coproc_device.devt), 0); - rc = cdev_add(&coproc_device.cdev, devno, 1); - if (rc) { - pr_err("cdev_add() failed %d\n", rc); - goto err_cdev; - } - - coproc_device.device = device_create(coproc_device.class, NULL, - devno, NULL, name, MINOR(devno)); - if (IS_ERR(coproc_device.device)) { - rc = PTR_ERR(coproc_device.device); - pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc); - goto err; - } - - pr_devel("%s: Added dev [%d,%d]\n", __func__, MAJOR(devno), - MINOR(devno)); - - return 0; - -err: - cdev_del(&coproc_device.cdev); -err_cdev: - class_destroy(coproc_device.class); -err_class: - unregister_chrdev_region(coproc_device.devt, 1); - return rc; -} -EXPORT_SYMBOL_GPL(vas_register_coproc_api); - -void vas_unregister_coproc_api(void) -{ - dev_t devno; - - cdev_del(&coproc_device.cdev); - devno = MKDEV(MAJOR(coproc_device.devt), 0); - device_destroy(coproc_device.class, devno); - - class_destroy(coproc_device.class); - unregister_chrdev_region(coproc_device.devt, 1); -} -EXPORT_SYMBOL_GPL(vas_unregister_coproc_api); diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c index 41fa90d2f4ab..3ce89a4b54be 100644 --- a/arch/powerpc/platforms/powernv/vas-debug.c +++ b/arch/powerpc/platforms/powernv/vas-debug.c @@ -9,6 +9,7 @@ #include <linux/slab.h> #include <linux/debugfs.h> #include <linux/seq_file.h> +#include <asm/vas.h> #include "vas.h" static struct dentry *vas_debugfs; @@ -28,7 +29,7 @@ static char *cop_to_str(int cop) static int info_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -36,9 +37,9 @@ static int info_show(struct seq_file *s, void *private) if (!window->hvwc_map) goto unlock; - seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop), + seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop), window->tx_win ? "Send" : "Receive"); - seq_printf(s, "Pid : %d\n", vas_window_pid(window)); + seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win)); unlock: mutex_unlock(&vas_mutex); @@ -47,7 +48,7 @@ unlock: DEFINE_SHOW_ATTRIBUTE(info); -static inline void print_reg(struct seq_file *s, struct vas_window *win, +static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win, char *name, u32 reg) { seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name); @@ -55,7 +56,7 @@ static inline void print_reg(struct seq_file *s, struct vas_window *win, static int hvwc_show(struct seq_file *s, void *private) { - struct vas_window *window = s->private; + struct pnv_vas_window *window = s->private; mutex_lock(&vas_mutex); @@ -103,8 +104,10 @@ unlock: DEFINE_SHOW_ATTRIBUTE(hvwc); -void vas_window_free_dbgdir(struct vas_window *window) +void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win) { + struct vas_window *window = &pnv_win->vas_win; + if (window->dbgdir) { debugfs_remove_recursive(window->dbgdir); kfree(window->dbgname); @@ -113,21 +116,21 @@ void vas_window_free_dbgdir(struct vas_window *window) } } -void vas_window_init_dbgdir(struct vas_window *window) +void vas_window_init_dbgdir(struct pnv_vas_window *window) { struct dentry *d; if (!window->vinst->dbgdir) return; - window->dbgname = kzalloc(16, GFP_KERNEL); - if (!window->dbgname) + window->vas_win.dbgname = kzalloc(16, GFP_KERNEL); + if (!window->vas_win.dbgname) return; - snprintf(window->dbgname, 16, "w%d", window->winid); + snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid); - d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir); - window->dbgdir = d; + d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir); + window->vas_win.dbgdir = d; debugfs_create_file("info", 0444, d, window, &info_fops); debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops); diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index 3d21fce254b7..a7aabc18039e 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -26,150 +26,6 @@ */ #define VAS_FAULT_WIN_FIFO_SIZE (4 << 20) -static void dump_crb(struct coprocessor_request_block *crb) -{ - struct data_descriptor_entry *dde; - struct nx_fault_stamp *nx; - - dde = &crb->source; - pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - dde = &crb->target; - pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n", - be64_to_cpu(dde->address), be32_to_cpu(dde->length), - dde->count, dde->index, dde->flags); - - nx = &crb->stamp.nx; - pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n", - be32_to_cpu(nx->pswid), - be64_to_cpu(crb->stamp.nx.fault_storage_addr), - nx->flags, nx->fault_status); -} - -/* - * Update the CSB to indicate a translation error. - * - * User space will be polling on CSB after the request is issued. - * If NX can handle the request without any issues, it updates CSB. - * Whereas if NX encounters page fault, the kernel will handle the - * fault and update CSB with translation error. - * - * If we are unable to update the CSB means copy_to_user failed due to - * invalid csb_addr, send a signal to the process. - */ -static void update_csb(struct vas_window *window, - struct coprocessor_request_block *crb) -{ - struct coprocessor_status_block csb; - struct kernel_siginfo info; - struct task_struct *tsk; - void __user *csb_addr; - struct pid *pid; - int rc; - - /* - * NX user space windows can not be opened for task->mm=NULL - * and faults will not be generated for kernel requests. - */ - if (WARN_ON_ONCE(!window->mm || !window->user_win)) - return; - - csb_addr = (void __user *)be64_to_cpu(crb->csb_addr); - - memset(&csb, 0, sizeof(csb)); - csb.cc = CSB_CC_FAULT_ADDRESS; - csb.ce = CSB_CE_TERMINATION; - csb.cs = 0; - csb.count = 0; - - /* - * NX operates and returns in BE format as defined CRB struct. - * So saves fault_storage_addr in BE as NX pastes in FIFO and - * expects user space to convert to CPU format. - */ - csb.address = crb->stamp.nx.fault_storage_addr; - csb.flags = 0; - - pid = window->pid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Process closes send window after all pending NX requests are - * completed. In multi-thread applications, a child thread can - * open a window and can exit without closing it. May be some - * requests are pending or this window can be used by other - * threads later. We should handle faults if NX encounters - * pages faults on these requests. Update CSB with translation - * error and fault address. If csb_addr passed by user space is - * invalid, send SEGV signal to pid saved in window. If the - * child thread is not running, send the signal to tgid. - * Parent thread (tgid) will close this window upon its exit. - * - * pid and mm references are taken when window is opened by - * process (pid). So tgid is used only when child thread opens - * a window and exits without closing it. - */ - if (!tsk) { - pid = window->tgid; - tsk = get_pid_task(pid, PIDTYPE_PID); - /* - * Parent thread (tgid) will be closing window when it - * exits. So should not get here. - */ - if (WARN_ON_ONCE(!tsk)) - return; - } - - /* Return if the task is exiting. */ - if (tsk->flags & PF_EXITING) { - put_task_struct(tsk); - return; - } - - kthread_use_mm(window->mm); - rc = copy_to_user(csb_addr, &csb, sizeof(csb)); - /* - * User space polls on csb.flags (first byte). So add barrier - * then copy first byte with csb flags update. - */ - if (!rc) { - csb.flags = CSB_V; - /* Make sure update to csb.flags is visible now */ - smp_mb(); - rc = copy_to_user(csb_addr, &csb, sizeof(u8)); - } - kthread_unuse_mm(window->mm); - put_task_struct(tsk); - - /* Success */ - if (!rc) - return; - - pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n", - csb_addr, pid_vnr(pid)); - - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = EFAULT; - info.si_code = SEGV_MAPERR; - info.si_addr = csb_addr; - - /* - * process will be polling on csb.flags after request is sent to - * NX. So generally CSB update should not fail except when an - * application passes invalid csb_addr. So an error message will - * be displayed and leave it to user space whether to ignore or - * handle this signal. - */ - rcu_read_lock(); - rc = kill_pid_info(SIGSEGV, &info, pid); - rcu_read_unlock(); - - pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__, - pid_vnr(pid), rc); -} - static void dump_fifo(struct vas_instance *vinst, void *entry) { unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size; @@ -212,7 +68,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) struct vas_instance *vinst = data; struct coprocessor_request_block *crb, *entry; struct coprocessor_request_block buf; - struct vas_window *window; + struct pnv_vas_window *window; unsigned long flags; void *fifo; @@ -272,7 +128,7 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) vinst->vas_id, vinst->fault_fifo, fifo, vinst->fault_crbs); - dump_crb(crb); + vas_dump_crb(crb); window = vas_pswid_to_window(vinst, be32_to_cpu(crb->stamp.nx.pswid)); @@ -293,7 +149,14 @@ irqreturn_t vas_fault_thread_fn(int irq, void *data) WARN_ON_ONCE(1); } else { - update_csb(window, crb); + /* + * NX sees faults only with user space windows. + */ + if (window->user_win) + vas_update_csb(crb, &window->vas_win.task_ref); + else + WARN_ON_ONCE(!window->user_win); + /* * Return credit for send window after processing * fault CRB. @@ -336,6 +199,7 @@ irqreturn_t vas_fault_handler(int irq, void *dev_id) int vas_setup_fault_window(struct vas_instance *vinst) { struct vas_rx_win_attr attr; + struct vas_window *win; vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE; vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL); @@ -364,18 +228,17 @@ int vas_setup_fault_window(struct vas_instance *vinst) attr.lnotify_pid = mfspr(SPRN_PID); attr.lnotify_tid = mfspr(SPRN_PID); - vinst->fault_win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, - &attr); - - if (IS_ERR(vinst->fault_win)) { - pr_err("VAS: Error %ld opening FaultWin\n", - PTR_ERR(vinst->fault_win)); + win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr); + if (IS_ERR(win)) { + pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win)); kfree(vinst->fault_fifo); - return PTR_ERR(vinst->fault_win); + return PTR_ERR(win); } + vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win); + pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n", - vinst->fault_win->winid, attr.lnotify_lpid, + vinst->fault_win->vas_win.winid, attr.lnotify_lpid, attr.lnotify_pid, attr.lnotify_tid); return 0; diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h index a449b9f0c12e..ca2e08f2ddc0 100644 --- a/arch/powerpc/platforms/powernv/vas-trace.h +++ b/arch/powerpc/platforms/powernv/vas-trace.h @@ -80,7 +80,7 @@ TRACE_EVENT( vas_tx_win_open, TRACE_EVENT( vas_paste_crb, TP_PROTO(struct task_struct *tsk, - struct vas_window *win), + struct pnv_vas_window *win), TP_ARGS(tsk, win), @@ -96,7 +96,7 @@ TRACE_EVENT( vas_paste_crb, TP_fast_assign( __entry->pid = tsk->pid; __entry->vasid = win->vinst->vas_id; - __entry->winid = win->winid; + __entry->winid = win->vas_win.winid; __entry->paste_kaddr = (unsigned long)win->paste_kaddr ), diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 5f5fe63a3d1c..0f8d39fbf2b2 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -16,6 +16,7 @@ #include <linux/mmu_context.h> #include <asm/switch_to.h> #include <asm/ppc-opcode.h> +#include <asm/vas.h> #include "vas.h" #include "copy-paste.h" @@ -26,14 +27,14 @@ * Compute the paste address region for the window @window using the * ->paste_base_addr and ->paste_win_id_shift we got from device tree. */ -void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) +void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len) { int winid; u64 base, shift; base = window->vinst->paste_base_addr; shift = window->vinst->paste_win_id_shift; - winid = window->winid; + winid = window->vas_win.winid; *addr = base + (winid << shift); if (len) @@ -42,23 +43,23 @@ void vas_win_paste_addr(struct vas_window *window, u64 *addr, int *len) pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -static inline void get_hvwc_mmio_bar(struct vas_window *window, +static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->hvwc_bar_start; - *start = pbaddr + window->winid * VAS_HVWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE; *len = VAS_HVWC_SIZE; } -static inline void get_uwc_mmio_bar(struct vas_window *window, +static inline void get_uwc_mmio_bar(struct pnv_vas_window *window, u64 *start, int *len) { u64 pbaddr; pbaddr = window->vinst->uwc_bar_start; - *start = pbaddr + window->winid * VAS_UWC_SIZE; + *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE; *len = VAS_UWC_SIZE; } @@ -67,7 +68,7 @@ static inline void get_uwc_mmio_bar(struct vas_window *window, * space. Unlike MMIO regions (map_mmio_region() below), paste region must * be mapped cache-able and is only applicable to send windows. */ -static void *map_paste_region(struct vas_window *txwin) +static void *map_paste_region(struct pnv_vas_window *txwin) { int len; void *map; @@ -75,7 +76,7 @@ static void *map_paste_region(struct vas_window *txwin) u64 start; name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id, - txwin->winid); + txwin->vas_win.winid); if (!name) goto free_name; @@ -132,7 +133,7 @@ static void unmap_region(void *addr, u64 start, int len) /* * Unmap the paste address region for a window. */ -static void unmap_paste_region(struct vas_window *window) +static void unmap_paste_region(struct pnv_vas_window *window) { int len; u64 busaddr_start; @@ -153,7 +154,7 @@ static void unmap_paste_region(struct vas_window *window) * path, just minimize the time we hold the mutex for now. We can add * a per-instance mutex later if necessary. */ -static void unmap_winctx_mmio_bars(struct vas_window *window) +static void unmap_winctx_mmio_bars(struct pnv_vas_window *window) { int len; void *uwc_map; @@ -186,7 +187,7 @@ static void unmap_winctx_mmio_bars(struct vas_window *window) * OS/User Window Context (UWC) MMIO Base Address Region for the given window. * Map these bus addresses and save the mapped kernel addresses in @window. */ -static int map_winctx_mmio_bars(struct vas_window *window) +static int map_winctx_mmio_bars(struct pnv_vas_window *window) { int len; u64 start; @@ -214,7 +215,7 @@ static int map_winctx_mmio_bars(struct vas_window *window) * registers are not sequential. And, we can only write to offsets * with valid registers. */ -static void reset_window_regs(struct vas_window *window) +static void reset_window_regs(struct pnv_vas_window *window) { write_hvwc_reg(window, VREG(LPID), 0ULL); write_hvwc_reg(window, VREG(PID), 0ULL); @@ -270,7 +271,7 @@ static void reset_window_regs(struct vas_window *window) * want to add fields to vas_winctx and move the initialization to * init_vas_winctx_regs(). */ -static void init_xlate_regs(struct vas_window *window, bool user_win) +static void init_xlate_regs(struct pnv_vas_window *window, bool user_win) { u64 lpcr, val; @@ -335,7 +336,7 @@ static void init_xlate_regs(struct vas_window *window, bool user_win) * * TODO: Reserved (aka dedicated) send buffers are not supported yet. */ -static void init_rsvd_tx_buf_count(struct vas_window *txwin, +static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin, struct vas_winctx *winctx) { write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL); @@ -357,7 +358,7 @@ static void init_rsvd_tx_buf_count(struct vas_window *txwin, * as a one-time task? That could work for NX but what about other * receivers? Let the receivers tell us the rx-fifo buffers for now. */ -static void init_winctx_regs(struct vas_window *window, +static void init_winctx_regs(struct pnv_vas_window *window, struct vas_winctx *winctx) { u64 val; @@ -519,10 +520,10 @@ static int vas_assign_window_id(struct ida *ida) return winid; } -static void vas_window_free(struct vas_window *window) +static void vas_window_free(struct pnv_vas_window *window) { - int winid = window->winid; struct vas_instance *vinst = window->vinst; + int winid = window->vas_win.winid; unmap_winctx_mmio_bars(window); @@ -533,10 +534,10 @@ static void vas_window_free(struct vas_window *window) vas_release_window_id(&vinst->ida, winid); } -static struct vas_window *vas_window_alloc(struct vas_instance *vinst) +static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst) { int winid; - struct vas_window *window; + struct pnv_vas_window *window; winid = vas_assign_window_id(&vinst->ida); if (winid < 0) @@ -547,7 +548,7 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst) goto out_free; window->vinst = vinst; - window->winid = winid; + window->vas_win.winid = winid; if (map_winctx_mmio_bars(window)) goto out_free; @@ -562,7 +563,7 @@ out_free: return ERR_PTR(-ENOMEM); } -static void put_rx_win(struct vas_window *rxwin) +static void put_rx_win(struct pnv_vas_window *rxwin) { /* Better not be a send window! */ WARN_ON_ONCE(rxwin->tx_win); @@ -578,10 +579,11 @@ static void put_rx_win(struct vas_window *rxwin) * * NOTE: We access ->windows[] table and assume that vinst->mutex is held. */ -static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) +static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst, + u32 pswid) { int vasid, winid; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; decode_pswid(pswid, &vasid, &winid); @@ -590,7 +592,7 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) rxwin = vinst->windows[winid]; - if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW) + if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW) return ERR_PTR(-EINVAL); return rxwin; @@ -602,10 +604,10 @@ static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid) * * See also function header of set_vinst_win(). */ -static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, +static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst, enum vas_cop_type cop, u32 pswid) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; mutex_lock(&vinst->mutex); @@ -638,9 +640,9 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst, * window, we also save the window in the ->rxwin[] table. */ static void set_vinst_win(struct vas_instance *vinst, - struct vas_window *window) + struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; mutex_lock(&vinst->mutex); @@ -649,8 +651,8 @@ static void set_vinst_win(struct vas_instance *vinst, * unless its a user (FTW) window. */ if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = window; + WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = window; } WARN_ON_ONCE(vinst->windows[id] != NULL); @@ -663,16 +665,16 @@ static void set_vinst_win(struct vas_instance *vinst, * Clear this window from the table(s) of windows for this VAS instance. * See also function header of set_vinst_win(). */ -static void clear_vinst_win(struct vas_window *window) +static void clear_vinst_win(struct pnv_vas_window *window) { - int id = window->winid; + int id = window->vas_win.winid; struct vas_instance *vinst = window->vinst; mutex_lock(&vinst->mutex); if (!window->user_win && !window->tx_win) { - WARN_ON_ONCE(!vinst->rxwin[window->cop]); - vinst->rxwin[window->cop] = NULL; + WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]); + vinst->rxwin[window->vas_win.cop] = NULL; } WARN_ON_ONCE(vinst->windows[id] != window); @@ -681,7 +683,7 @@ static void clear_vinst_win(struct vas_window *window) mutex_unlock(&vinst->mutex); } -static void init_winctx_for_rxwin(struct vas_window *rxwin, +static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, struct vas_rx_win_attr *rxattr, struct vas_winctx *winctx) { @@ -702,7 +704,7 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin, winctx->rx_fifo = rxattr->rx_fifo; winctx->rx_fifo_size = rxattr->rx_fifo_size; - winctx->wcreds_max = rxwin->wcreds_max; + winctx->wcreds_max = rxwin->vas_win.wcreds_max; winctx->pin_win = rxattr->pin_win; winctx->nx_win = rxattr->nx_win; @@ -851,7 +853,7 @@ EXPORT_SYMBOL_GPL(vas_init_rx_win_attr); struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, struct vas_rx_win_attr *rxattr) { - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -870,21 +872,21 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop, rxwin = vas_window_alloc(vinst); if (IS_ERR(rxwin)) { pr_devel("Unable to allocate memory for Rx window\n"); - return rxwin; + return (struct vas_window *)rxwin; } rxwin->tx_win = false; rxwin->nx_win = rxattr->nx_win; rxwin->user_win = rxattr->user_win; - rxwin->cop = cop; - rxwin->wcreds_max = rxattr->wcreds_max; + rxwin->vas_win.cop = cop; + rxwin->vas_win.wcreds_max = rxattr->wcreds_max; init_winctx_for_rxwin(rxwin, rxattr, &winctx); init_winctx_regs(rxwin, &winctx); set_vinst_win(vinst, rxwin); - return rxwin; + return &rxwin->vas_win; } EXPORT_SYMBOL_GPL(vas_rx_win_open); @@ -905,7 +907,7 @@ void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop) } EXPORT_SYMBOL_GPL(vas_init_tx_win_attr); -static void init_winctx_for_txwin(struct vas_window *txwin, +static void init_winctx_for_txwin(struct pnv_vas_window *txwin, struct vas_tx_win_attr *txattr, struct vas_winctx *winctx) { @@ -926,7 +928,7 @@ static void init_winctx_for_txwin(struct vas_window *txwin, */ memset(winctx, 0, sizeof(struct vas_winctx)); - winctx->wcreds_max = txwin->wcreds_max; + winctx->wcreds_max = txwin->vas_win.wcreds_max; winctx->user_win = txattr->user_win; winctx->nx_win = txwin->rxwin->nx_win; @@ -946,13 +948,13 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->lpid = txattr->lpid; winctx->pidr = txattr->pidr; - winctx->rx_win_id = txwin->rxwin->winid; + winctx->rx_win_id = txwin->rxwin->vas_win.winid; /* * IRQ and fault window setup is successful. Set fault window * for the send window so that ready to handle faults. */ if (txwin->vinst->virq) - winctx->fault_win_id = txwin->vinst->fault_win->winid; + winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid; winctx->dma_type = VAS_DMA_TYPE_INJECT; winctx->tc_mode = txattr->tc_mode; @@ -962,7 +964,8 @@ static void init_winctx_for_txwin(struct vas_window *txwin, winctx->irq_port = txwin->vinst->irq_port; winctx->pswid = txattr->pswid ? txattr->pswid : - encode_pswid(txwin->vinst->vas_id, txwin->winid); + encode_pswid(txwin->vinst->vas_id, + txwin->vas_win.winid); } static bool tx_win_args_valid(enum vas_cop_type cop, @@ -993,8 +996,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, struct vas_tx_win_attr *attr) { int rc; - struct vas_window *txwin; - struct vas_window *rxwin; + struct pnv_vas_window *txwin; + struct pnv_vas_window *rxwin; struct vas_winctx winctx; struct vas_instance *vinst; @@ -1020,7 +1023,7 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rxwin = get_vinst_rxwin(vinst, cop, attr->pswid); if (IS_ERR(rxwin)) { pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop); - return rxwin; + return (struct vas_window *)rxwin; } txwin = vas_window_alloc(vinst); @@ -1029,12 +1032,12 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, goto put_rxwin; } - txwin->cop = cop; + txwin->vas_win.cop = cop; txwin->tx_win = 1; txwin->rxwin = rxwin; txwin->nx_win = txwin->rxwin->nx_win; txwin->user_win = attr->user_win; - txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; + txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT; init_winctx_for_txwin(txwin, attr, &winctx); @@ -1064,56 +1067,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop, rc = -ENODEV; goto free_window; } - - /* - * Window opened by a child thread may not be closed when - * it exits. So take reference to its pid and release it - * when the window is free by parent thread. - * Acquire a reference to the task's pid to make sure - * pid will not be re-used - needed only for multithread - * applications. - */ - txwin->pid = get_task_pid(current, PIDTYPE_PID); - /* - * Acquire a reference to the task's mm. - */ - txwin->mm = get_task_mm(current); - - if (!txwin->mm) { - put_pid(txwin->pid); - pr_err("VAS: pid(%d): mm_struct is not found\n", - current->pid); - rc = -EPERM; + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) goto free_window; - } - mmgrab(txwin->mm); - mmput(txwin->mm); - mm_context_add_vas_window(txwin->mm); - /* - * Process closes window during exit. In the case of - * multithread application, the child thread can open - * window and can exit without closing it. Expects parent - * thread to use and close the window. So do not need - * to take pid reference for parent thread. - */ - txwin->tgid = find_get_pid(task_tgid_vnr(current)); - /* - * Even a process that has no foreign real address mapping can - * use an unpaired COPY instruction (to no real effect). Issue - * CP_ABORT to clear any pending COPY and prevent a covert - * channel. - * - * __switch_to() will issue CP_ABORT on future context switches - * if process / thread has any open VAS window (Use - * current->mm->context.vas_windows). - */ - asm volatile(PPC_CP_ABORT); + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); } set_vinst_win(vinst, txwin); - return txwin; + return &txwin->vas_win; free_window: vas_window_free(txwin); @@ -1132,12 +1095,14 @@ int vas_copy_crb(void *crb, int offset) EXPORT_SYMBOL_GPL(vas_copy_crb); #define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) -int vas_paste_crb(struct vas_window *txwin, int offset, bool re) +int vas_paste_crb(struct vas_window *vwin, int offset, bool re) { + struct pnv_vas_window *txwin; int rc; void *addr; uint64_t val; + txwin = container_of(vwin, struct pnv_vas_window, vas_win); trace_vas_paste_crb(current, txwin); /* @@ -1167,7 +1132,7 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re) else rc = -EINVAL; - pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid, + pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid, read_hvwc_reg(txwin, VREG(LRFIFO_PUSH))); return rc; @@ -1187,7 +1152,7 @@ EXPORT_SYMBOL_GPL(vas_paste_crb); * user space. (NX-842 driver waits for CSB and Fast thread-wakeup * doesn't use credit checking). */ -static void poll_window_credits(struct vas_window *window) +static void poll_window_credits(struct pnv_vas_window *window) { u64 val; int creds, mode; @@ -1217,7 +1182,7 @@ retry: * and issue CRB Kill to stop all pending requests. Need only * if there is a bug in NX or fault handling in kernel. */ - if (creds < window->wcreds_max) { + if (creds < window->vas_win.wcreds_max) { val = 0; set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(msecs_to_jiffies(10)); @@ -1228,7 +1193,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n", - vas_window_pid(window), window->winid, + vas_window_pid(&window->vas_win), + window->vas_win.winid, creds, count); goto retry; @@ -1240,7 +1206,7 @@ retry: * short time to queue a CRB, so window should not be busy for too long. * Trying 5ms intervals. */ -static void poll_window_busy_state(struct vas_window *window) +static void poll_window_busy_state(struct pnv_vas_window *window) { int busy; u64 val; @@ -1260,7 +1226,8 @@ retry: */ if (!(count % 1000)) pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n", - vas_window_pid(window), window->winid, count); + vas_window_pid(&window->vas_win), + window->vas_win.winid, count); goto retry; } @@ -1282,7 +1249,7 @@ retry: * casting out becomes necessary we should consider offloading the * job to a worker thread, so the window close can proceed quickly. */ -static void poll_window_castout(struct vas_window *window) +static void poll_window_castout(struct pnv_vas_window *window) { /* stub for now */ } @@ -1291,7 +1258,7 @@ static void poll_window_castout(struct vas_window *window) * Unpin and close a window so no new requests are accepted and the * hardware can evict this window from cache if necessary. */ -static void unpin_close_window(struct vas_window *window) +static void unpin_close_window(struct pnv_vas_window *window) { u64 val; @@ -1313,11 +1280,15 @@ static void unpin_close_window(struct vas_window *window) * * Besides the hardware, kernel has some bookkeeping of course. */ -int vas_win_close(struct vas_window *window) +int vas_win_close(struct vas_window *vwin) { - if (!window) + struct pnv_vas_window *window; + + if (!vwin) return 0; + window = container_of(vwin, struct pnv_vas_window, vas_win); + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { pr_devel("Attempting to close an active Rx window!\n"); WARN_ON_ONCE(1); @@ -1339,12 +1310,8 @@ int vas_win_close(struct vas_window *window) /* if send window, drop reference to matching receive window */ if (window->tx_win) { if (window->user_win) { - /* Drop references to pid and mm */ - put_pid(window->pid); - if (window->mm) { - mm_context_remove_vas_window(window->mm); - mmdrop(window->mm); - } + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); } put_rx_win(window->rxwin); } @@ -1377,7 +1344,7 @@ EXPORT_SYMBOL_GPL(vas_win_close); * - The kernel with return credit on fault window after reading entry * from fault FIFO. */ -void vas_return_credit(struct vas_window *window, bool tx) +void vas_return_credit(struct pnv_vas_window *window, bool tx) { uint64_t val; @@ -1391,10 +1358,10 @@ void vas_return_credit(struct vas_window *window, bool tx) } } -struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid) { - struct vas_window *window; + struct pnv_vas_window *window; int winid; if (!pswid) { @@ -1431,13 +1398,74 @@ struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, * by NX). */ if (!window->tx_win || !window->user_win || !window->nx_win || - window->cop == VAS_COP_TYPE_FAULT || - window->cop == VAS_COP_TYPE_FTW) { + window->vas_win.cop == VAS_COP_TYPE_FAULT || + window->vas_win.cop == VAS_COP_TYPE_FTW) { pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n", winid, window->tx_win, window->user_win, - window->nx_win, window->cop); + window->nx_win, window->vas_win.cop); WARN_ON(1); } return window; } + +static struct vas_window *vas_user_win_open(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + struct vas_tx_win_attr txattr = {}; + + vas_init_tx_win_attr(&txattr, cop_type); + + txattr.lpid = mfspr(SPRN_LPID); + txattr.pidr = mfspr(SPRN_PID); + txattr.user_win = true; + txattr.rsvd_txbuf_count = false; + txattr.pswid = false; + + pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr, + mfspr(SPRN_PID)); + + return vas_tx_win_open(vas_id, cop_type, &txattr); +} + +static u64 vas_user_win_paste_addr(struct vas_window *txwin) +{ + struct pnv_vas_window *win; + u64 paste_addr; + + win = container_of(txwin, struct pnv_vas_window, vas_win); + vas_win_paste_addr(win, &paste_addr, NULL); + + return paste_addr; +} + +static int vas_user_win_close(struct vas_window *txwin) +{ + vas_win_close(txwin); + + return 0; +} + +static const struct vas_user_win_ops vops = { + .open_win = vas_user_win_open, + .paste_addr = vas_user_win_paste_addr, + .close_win = vas_user_win_close, +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + + return vas_register_coproc_api(mod, cop_type, name, &vops); +} +EXPORT_SYMBOL_GPL(vas_register_api_powernv); + +void vas_unregister_api_powernv(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_powernv); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index c7db3190baca..8bb08e395de0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -334,11 +334,11 @@ struct vas_instance { int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */ spinlock_t fault_lock; /* Protects fifo_in_progress update */ void *fault_fifo; - struct vas_window *fault_win; /* Fault window */ + struct pnv_vas_window *fault_win; /* Fault window */ struct mutex mutex; - struct vas_window *rxwin[VAS_COP_TYPE_MAX]; - struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; + struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX]; + struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP]; char *name; char *dbgname; @@ -346,32 +346,24 @@ struct vas_instance { }; /* - * In-kernel state a VAS window. One per window. + * In-kernel state a VAS window on PowerNV. One per window. */ -struct vas_window { +struct pnv_vas_window { + struct vas_window vas_win; /* Fields common to send and receive windows */ struct vas_instance *vinst; - int winid; bool tx_win; /* True if send window */ bool nx_win; /* True if NX window */ bool user_win; /* True if user space window */ void *hvwc_map; /* HV window context */ void *uwc_map; /* OS/User window context */ - struct pid *pid; /* Linux process id of owner */ - struct pid *tgid; /* Thread group ID of owner */ - struct mm_struct *mm; /* Linux process mm_struct */ - int wcreds_max; /* Window credits */ - - char *dbgname; - struct dentry *dbgdir; /* Fields applicable only to send windows */ void *paste_kaddr; char *paste_addr_name; - struct vas_window *rxwin; + struct pnv_vas_window *rxwin; - /* Feilds applicable only to receive windows */ - enum vas_cop_type cop; + /* Fields applicable only to receive windows */ atomic_t num_txwins; }; @@ -430,32 +422,32 @@ extern struct mutex vas_mutex; extern struct vas_instance *find_vas_instance(int vasid); extern void vas_init_dbgdir(void); extern void vas_instance_init_dbgdir(struct vas_instance *vinst); -extern void vas_window_init_dbgdir(struct vas_window *win); -extern void vas_window_free_dbgdir(struct vas_window *win); +extern void vas_window_init_dbgdir(struct pnv_vas_window *win); +extern void vas_window_free_dbgdir(struct pnv_vas_window *win); extern int vas_setup_fault_window(struct vas_instance *vinst); extern irqreturn_t vas_fault_thread_fn(int irq, void *data); extern irqreturn_t vas_fault_handler(int irq, void *dev_id); -extern void vas_return_credit(struct vas_window *window, bool tx); -extern struct vas_window *vas_pswid_to_window(struct vas_instance *vinst, +extern void vas_return_credit(struct pnv_vas_window *window, bool tx); +extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst, uint32_t pswid); -extern void vas_win_paste_addr(struct vas_window *window, u64 *addr, - int *len); +extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, + int *len); static inline int vas_window_pid(struct vas_window *window) { - return pid_vnr(window->pid); + return pid_vnr(window->task_ref.pid); } -static inline void vas_log_write(struct vas_window *win, char *name, +static inline void vas_log_write(struct pnv_vas_window *win, char *name, void *regptr, u64 val) { if (val) pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n", - win->tx_win ? "Tx" : "Rx", win->winid, name, - regptr, val); + win->tx_win ? "Tx" : "Rx", win->vas_win.winid, + name, regptr, val); } -static inline void write_uwc_reg(struct vas_window *win, char *name, +static inline void write_uwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -466,7 +458,7 @@ static inline void write_uwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline void write_hvwc_reg(struct vas_window *win, char *name, +static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name, s32 reg, u64 val) { void *regptr; @@ -477,7 +469,7 @@ static inline void write_hvwc_reg(struct vas_window *win, char *name, out_be64(regptr, val); } -static inline u64 read_hvwc_reg(struct vas_window *win, +static inline u64 read_hvwc_reg(struct pnv_vas_window *win, char *name __maybe_unused, s32 reg) { return in_be64(win->hvwc_map+reg); diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig index 4d0535cc7946..a4048b8c8c50 100644 --- a/arch/powerpc/platforms/ps3/Kconfig +++ b/arch/powerpc/platforms/ps3/Kconfig @@ -86,6 +86,15 @@ config PS3_SYS_MANAGER This support is required for PS3 system control. In general, all users will say Y or M. +config PS3_VERBOSE_RESULT + bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED + depends on PPC_PS3 + help + Enables more verbose log mesages for LV1 hypercall results. + + If in doubt, say N here and reduce the size of the kernel by a + small amount. + config PS3_REPOSITORY_WRITE bool "PS3 Repository write support" if PS3_ADVANCED depends on PPC_PS3 diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index d094321964fb..a81eac35d900 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include <linux/dma-mapping.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/memblock.h> @@ -1118,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1129,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : ALIGN(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index e9ae5dd03593..3de9145c20bc 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -36,6 +36,7 @@ DEFINE_MUTEX(ps3_gpu_mutex); EXPORT_SYMBOL_GPL(ps3_gpu_mutex); static union ps3_firmware_version ps3_firmware_version; +static char ps3_firmware_version_str[16]; void ps3_get_firmware_version(union ps3_firmware_version *v) { @@ -182,6 +183,40 @@ static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) return lv1_set_dabr(dabr, dabrx) ? -1 : 0; } +static ssize_t ps3_fw_version_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%s", ps3_firmware_version_str); +} + +static int __init ps3_setup_sysfs(void) +{ + static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO, + ps3_fw_version_show, NULL); + static struct kobject *kobj; + int result; + + kobj = kobject_create_and_add("ps3", firmware_kobj); + + if (!kobj) { + pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__, + __LINE__); + return -ENOMEM; + } + + result = sysfs_create_file(kobj, &attr.attr); + + if (result) { + pr_warn("%s:%d: sysfs_create_file failed.\n", __func__, + __LINE__); + kobject_put(kobj); + return -ENOMEM; + } + + return 0; +} +core_initcall(ps3_setup_sysfs); + static void __init ps3_setup_arch(void) { u64 tmp; @@ -190,9 +225,11 @@ static void __init ps3_setup_arch(void) lv1_get_version_info(&ps3_firmware_version.raw, &tmp); - printk(KERN_INFO "PS3 firmware version %u.%u.%u\n", - ps3_firmware_version.major, ps3_firmware_version.minor, - ps3_firmware_version.rev); + snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str), + "%u.%u.%u", ps3_firmware_version.major, + ps3_firmware_version.minor, ps3_firmware_version.rev); + + printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str); ps3_spu_set_platform(); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b431f41c6cb5..1a5665875165 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -64,9 +64,10 @@ static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev) result = lv1_open_device(dev->bus_id, dev->dev_id, 0); if (result) { - pr_debug("%s:%d: lv1_open_device failed: %s\n", __func__, - __LINE__, ps3_result(result)); - result = -EPERM; + pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n", + __func__, __LINE__, dev->match_id, dev->match_sub_id, + dev_name(&dev->core), ps3_result(result)); + result = -EPERM; } done: @@ -120,7 +121,7 @@ static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev) result = lv1_gpu_open(0); if (result) { - pr_debug("%s:%d: lv1_gpu_open failed: %s\n", __func__, + pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__, __LINE__, ps3_result(result)); result = -EPERM; } diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c8a2b0b05ac0..4cda0ef87be0 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -30,3 +30,4 @@ obj-$(CONFIG_PPC_SVM) += svm.o obj-$(CONFIG_FA_DUMP) += rtas-fadump.o obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PPC_VAS) += vas.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 3ac70790ec7a..b1f01ac0c29e 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_UNUSABLE) return -1; @@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; @@ -333,8 +331,7 @@ int dlpar_unisolate_drc(u32 drc_index) { int dr_status, rc; - rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, - DR_ENTITY_SENSE, drc_index); + rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); if (rc || dr_status != DR_ENTITY_PRESENT) return -1; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8377f1f7c78e..377d852f5a9a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -348,7 +348,8 @@ static int pseries_remove_mem_node(struct device_node *np) static bool lmb_is_removable(struct drmem_lmb *lmb) { - if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) + if ((lmb->flags & DRCONF_MEM_RESERVED) || + !(lmb->flags & DRCONF_MEM_ASSIGNED)) return false; #ifdef CONFIG_FA_DUMP @@ -401,7 +402,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) { struct drmem_lmb *lmb; - int lmbs_removed = 0; + int lmbs_reserved = 0; int lmbs_available = 0; int rc; @@ -435,12 +436,12 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) */ drmem_mark_lmb_reserved(lmb); - lmbs_removed++; - if (lmbs_removed == lmbs_to_remove) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_remove) break; } - if (lmbs_removed != lmbs_to_remove) { + if (lmbs_reserved != lmbs_to_remove) { pr_err("Memory hot-remove failed, adding LMB's back\n"); for_each_drmem_lmb(lmb) { @@ -453,6 +454,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->drc_index); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = -EINVAL; @@ -466,6 +471,10 @@ static int dlpar_memory_remove_by_count(u32 lmbs_to_remove) lmb->base_addr); drmem_remove_lmb_reservation(lmb); + + lmbs_reserved--; + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -508,7 +517,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index) static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-remove %u LMB(s) at %x\n", @@ -521,18 +529,29 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (rc) return -EINVAL; - /* Validate that there are enough LMBs to satisfy the request */ + /* + * Validate that all LMBs in range are not reserved. Note that it + * is ok if they are !ASSIGNED since our goal here is to remove the + * LMB range, regardless of whether some LMBs were already removed + * by any other reason. + * + * This is a contrast to what is done in remove_by_count() where we + * check for both RESERVED and !ASSIGNED (via lmb_is_removable()), + * because we want to remove a fixed amount of LMBs in that function. + */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_remove) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { + /* + * dlpar_remove_lmb() will error out if the LMB is already + * !ASSIGNED, but this case is a no-op for us. + */ if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) continue; @@ -551,6 +570,13 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) if (!drmem_lmb_reserved(lmb)) continue; + /* + * Setting the isolation state of an UNISOLATED/CONFIGURED + * device to UNISOLATE is a no-op, but the hypervisor can + * use it as a hint that the LMB removal failed. + */ + dlpar_unisolate_drc(lmb->drc_index); + rc = dlpar_add_lmb(lmb); if (rc) pr_err("Failed to add LMB, drc index %x\n", @@ -585,10 +611,6 @@ static inline int pseries_remove_mem_node(struct device_node *np) { return 0; } -static inline int dlpar_memory_remove(struct pseries_hp_errorlog *hp_elog) -{ - return -EOPNOTSUPP; -} static int dlpar_remove_lmb(struct drmem_lmb *lmb) { return -EOPNOTSUPP; @@ -651,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) { struct drmem_lmb *lmb; int lmbs_available = 0; - int lmbs_added = 0; + int lmbs_reserved = 0; int rc; pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add); @@ -661,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) /* Validate that there are enough LMBs to satisfy the request */ for_each_drmem_lmb(lmb) { + if (lmb->flags & DRCONF_MEM_RESERVED) + continue; + if (!(lmb->flags & DRCONF_MEM_ASSIGNED)) lmbs_available++; @@ -689,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) * requested LMBs cannot be added. */ drmem_mark_lmb_reserved(lmb); - - lmbs_added++; - if (lmbs_added == lmbs_to_add) + lmbs_reserved++; + if (lmbs_reserved == lmbs_to_add) break; } - if (lmbs_added != lmbs_to_add) { + if (lmbs_reserved != lmbs_to_add) { pr_err("Memory hot-add failed, removing any added LMBs\n"); for_each_drmem_lmb(lmb) { @@ -710,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) dlpar_release_drc(lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = -EINVAL; } else { @@ -720,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add) pr_debug("Memory at %llx (drc index %x) was hot-added\n", lmb->base_addr, lmb->drc_index); drmem_remove_lmb_reservation(lmb); + lmbs_reserved--; + + if (lmbs_reserved == 0) + break; } rc = 0; } @@ -764,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index) static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) { struct drmem_lmb *lmb, *start_lmb, *end_lmb; - int lmbs_available = 0; int rc; pr_info("Attempting to hot-add %u LMB(s) at index %x\n", @@ -779,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index) /* Validate that the LMBs in this range are not reserved */ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { - if (lmb->flags & DRCONF_MEM_RESERVED) - break; - - lmbs_available++; + /* Fail immediately if the whole range can't be hot-added */ + if (lmb->flags & DRCONF_MEM_RESERVED) { + pr_err("Memory at %llx (drc index %x) is reserved\n", + lmb->base_addr, lmb->drc_index); + return -EINVAL; + } } - if (lmbs_available < lmbs_to_add) - return -EINVAL; - for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) { if (lmb->flags & DRCONF_MEM_ASSIGNED) continue; diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 8a2b8d64265b..ab9fc6506861 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -108,6 +108,10 @@ _GLOBAL_TOC(plpar_hcall_norets_notrace) mfcr r0 stw r0,8(r1) HVSC /* invoke the hypervisor */ + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -120,6 +124,9 @@ _GLOBAL_TOC(plpar_hcall_norets) HCALL_BRANCH(plpar_hcall_norets_trace) HVSC /* invoke the hypervisor */ + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr /* return r3 = status */ @@ -129,6 +136,10 @@ plpar_hcall_norets_trace: HCALL_INST_PRECALL(R4) HVSC HCALL_INST_POSTCALL_NORETS + + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 blr @@ -159,6 +170,9 @@ _GLOBAL_TOC(plpar_hcall) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -188,6 +202,9 @@ plpar_hcall_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -223,6 +240,9 @@ _GLOBAL(plpar_hcall_raw) std r6, 16(r12) std r7, 24(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -262,6 +282,9 @@ _GLOBAL_TOC(plpar_hcall9) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -300,6 +323,9 @@ plpar_hcall9_trace: HCALL_INST_POSTCALL(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 @@ -339,6 +365,9 @@ _GLOBAL(plpar_hcall9_raw) std r11,56(r12) std r0, 64(r12) + li r4,0 + stb r4,PACASRR_VALID(r13) + lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ef26fe40efb0..f48e87ac89c9 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -18,6 +18,7 @@ #include <asm/plpar_wrappers.h> #include <asm/papr_pdsm.h> #include <asm/mce.h> +#include <asm/unaligned.h> #define BIND_ANY_ADDR (~0ul) @@ -114,6 +115,9 @@ struct papr_scm_priv { /* Health information for the dimm */ u64 health_bitmap; + /* Holds the last known dirty shutdown counter value */ + u64 dirty_shutdown_counter; + /* length of the stat buffer as expected by phyp */ size_t stat_buffer_len; }; @@ -260,7 +264,7 @@ err_out: * Query the Dimm performance stats from PHYP and copy them (if returned) to * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast * (num_stats + header) bytes. - * - If buff_stats == NULL the return value is the size in byes of the buffer + * - If buff_stats == NULL the return value is the size in bytes of the buffer * needed to hold all supported performance-statistics. * - If buff_stats != NULL and num_stats == 0 then we copy all known * performance-statistics to 'buff_stat' and expect to be large enough to @@ -310,6 +314,13 @@ static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, dev_err(&p->pdev->dev, "Unknown performance stats, Err:0x%016lX\n", ret[0]); return -ENOENT; + } else if (rc == H_AUTHORITY) { + dev_info(&p->pdev->dev, + "Permission denied while accessing performance stats"); + return -EPERM; + } else if (rc == H_UNSUPPORTED) { + dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); + return -EOPNOTSUPP; } else if (rc != H_SUCCESS) { dev_err(&p->pdev->dev, "Failed to query performance stats, Err:%lld\n", rc); @@ -596,6 +607,16 @@ free_stats: return rc; } +/* Add the dirty-shutdown-counter value to the pdsm */ +static int papr_pdsm_dsc(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; + payload->health.dimm_dsc = p->dirty_shutdown_counter; + + return sizeof(struct nd_papr_pdsm_health); +} + /* Fetch the DIMM health info and populate it in provided package. */ static int papr_pdsm_health(struct papr_scm_priv *p, union nd_pdsm_payload *payload) @@ -639,6 +660,8 @@ static int papr_pdsm_health(struct papr_scm_priv *p, /* Populate the fuel gauge meter in the payload */ papr_pdsm_fuel_gauge(p, payload); + /* Populate the dirty-shutdown-counter field */ + papr_pdsm_dsc(p, payload); rc = sizeof(struct nd_papr_pdsm_health); @@ -900,15 +923,41 @@ static ssize_t flags_show(struct device *dev, } DEVICE_ATTR_RO(flags); +static ssize_t dirty_shutdown_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + + return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); +} +DEVICE_ATTR_RO(dirty_shutdown); + +static umode_t papr_nd_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct nvdimm *nvdimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); + + /* For if perf-stats not available remove perf_stats sysfs */ + if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) + return 0; + + return attr->mode; +} + /* papr_scm specific dimm attributes */ static struct attribute *papr_nd_attributes[] = { &dev_attr_flags.attr, &dev_attr_perf_stats.attr, + &dev_attr_dirty_shutdown.attr, NULL, }; static struct attribute_group papr_nd_attribute_group = { .name = "papr", + .is_visible = papr_nd_attribute_visible, .attrs = papr_nd_attributes, }; @@ -924,7 +973,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) struct nd_region_desc ndr_desc; unsigned long dimm_flags; int target_nid, online_nid; - ssize_t stat_size; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -1009,16 +1057,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) list_add_tail(&p->region_list, &papr_nd_regions); mutex_unlock(&papr_ndr_lock); - /* Try retriving the stat buffer and see if its supported */ - stat_size = drc_pmem_query_stats(p, NULL, 0); - if (stat_size > 0) { - p->stat_buffer_len = stat_size; - dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", - p->stat_buffer_len); - } else { - dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n"); - } - return 0; err: nvdimm_bus_unregister(p->bus); @@ -1094,8 +1132,10 @@ static int papr_scm_probe(struct platform_device *pdev) u32 drc_index, metadata_size; u64 blocks, block_size; struct papr_scm_priv *p; + u8 uuid_raw[UUID_SIZE]; const char *uuid_str; - u64 uuid[2]; + ssize_t stat_size; + uuid_t uuid; int rc; /* check we have all the required DT properties */ @@ -1137,17 +1177,28 @@ static int papr_scm_probe(struct platform_device *pdev) p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); + if (of_property_read_u64(dn, "ibm,persistence-failed-count", + &p->dirty_shutdown_counter)) + p->dirty_shutdown_counter = 0; + /* We just need to ensure that set cookies are unique across */ - uuid_parse(uuid_str, (uuid_t *) uuid); + uuid_parse(uuid_str, &uuid); + /* - * cookie1 and cookie2 are not really little endian - * we store a little endian representation of the - * uuid str so that we can compare this with the label - * area cookie irrespective of the endian config with which - * the kernel is built. + * The cookie1 and cookie2 are not really little endian. + * We store a raw buffer representation of the + * uuid string so that we can compare this with the label + * area cookie irrespective of the endian configuration + * with which the kernel is built. + * + * Historically we stored the cookie in the below format. + * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa + * cookie1 was 0xfd423b0b671b5172 + * cookie2 was 0xaabce8cae35b1d8d */ - p->nd_set.cookie1 = cpu_to_le64(uuid[0]); - p->nd_set.cookie2 = cpu_to_le64(uuid[1]); + export_uuid(uuid_raw, &uuid); + p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); + p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); /* might be zero */ p->metadata_size = metadata_size; @@ -1172,6 +1223,14 @@ static int papr_scm_probe(struct platform_device *pdev) p->res.name = pdev->name; p->res.flags = IORESOURCE_MEM; + /* Try retrieving the stat buffer and see if its supported */ + stat_size = drc_pmem_query_stats(p, NULL, 0); + if (stat_size > 0) { + p->stat_buffer_len = stat_size; + dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", + p->stat_buffer_len); + } + rc = papr_scm_nvdimm_init(p); if (rc) goto err2; diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9d4ef65da7f3..167f2e1b8d39 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -487,8 +487,8 @@ int pSeries_system_reset_exception(struct pt_regs *regs) if ((be64_to_cpu(regs->msr) & (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { - regs->nip = be64_to_cpu((__be64)regs->nip); - regs->msr = 0; + regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); + regs_set_return_msr(regs, 0); } #endif @@ -593,8 +593,6 @@ static int mce_handle_err_virtmode(struct pt_regs *regs, mce_err.severity = MCE_SEV_SEVERE; else if (severity == RTAS_SEVERITY_ERROR) mce_err.severity = MCE_SEV_SEVERE; - else if (severity == RTAS_SEVERITY_FATAL) - mce_err.severity = MCE_SEV_FATAL; else mce_err.severity = MCE_SEV_FATAL; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 754e493b7c05..631a0d57b6cd 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -549,6 +549,15 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER) + security_ftr_clear(SEC_FTR_STF_BARRIER); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index c70b4be9f0a5..096629f54576 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -211,7 +211,9 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; - if (check_kvm_guest()) { + check_kvm_guest(); + + if (is_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp * faults to the hypervisor which then reads the instruction diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c new file mode 100644 index 000000000000..b5c1cf1bc64d --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.c @@ -0,0 +1,595 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020-21 IBM Corp. + */ + +#define pr_fmt(fmt) "vas: " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#include <asm/vas.h> +#include "vas.h" + +#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul +#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul +/* The hypervisor allows one credit per window right now */ +#define DEF_WIN_CREDS 1 + +static struct vas_all_caps caps_all; +static bool copypaste_feat; + +static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE]; +static DEFINE_MUTEX(vas_pseries_mutex); + +static long hcall_return_busy_check(long rc) +{ + /* Check if we are stalled for some time */ + if (H_IS_LONG_BUSY(rc)) { + msleep(get_longbusy_msecs(rc)); + rc = H_BUSY; + } else if (rc == H_BUSY) { + cond_resched(); + } + + return rc; +} + +/* + * Allocate VAS window hcall + */ +static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain, + u8 wintype, u16 credits) +{ + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + long rc; + + do { + rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype, + credits, domain[0], domain[1], domain[2], + domain[3], domain[4], domain[5]); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) { + if (win->win_addr == VAS_INVALID_WIN_ADDRESS) { + pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + win->vas_win.winid = retbuf[0]; + win->win_addr = retbuf[1]; + win->complete_irq = retbuf[2]; + win->fault_irq = retbuf[3]; + return 0; + } + + pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n", + rc, wintype, credits); + + return -EIO; +} + +/* + * Deallocate VAS window hcall. + */ +static int h_deallocate_vas_window(u64 winid) +{ + long rc; + + do { + rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n", + rc, winid); + return -EIO; +} + +/* + * Modify VAS window. + * After the window is opened with allocate window hcall, configure it + * with flags and LPAR PID before using. + */ +static int h_modify_vas_window(struct pseries_vas_window *win) +{ + long rc; + u32 lpid = mfspr(SPRN_PID); + + /* + * AMR value is not supported in Linux VAS implementation. + * The hypervisor ignores it if 0 is passed. + */ + do { + rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW, + win->vas_win.winid, lpid, 0, + VAS_MOD_WIN_FLAGS, 0); + + rc = hcall_return_busy_check(rc); + } while (rc == H_BUSY); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n", + rc, win->vas_win.winid, lpid); + return -EIO; +} + +/* + * This hcall is used to determine the capabilities from the hypervisor. + * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES + * @query_type: If 0 is passed, the hypervisor returns the overall + * capabilities which provides all feature(s) that are + * available. Then query the hypervisor to get the + * corresponding capabilities for the specific feature. + * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS + * and VAS GZIP Default capabilities. + * H_QUERY_NX_CAPABILITIES provides NX GZIP + * capabilities. + * @result: Return buffer to save capabilities. + */ +int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) +{ + long rc; + + rc = plpar_hcall_norets(hcall, query_type, result); + + if (rc == H_SUCCESS) + return 0; + + pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", + hcall, rc, query_type, result); + return -EIO; +} +EXPORT_SYMBOL_GPL(h_query_vas_capabilities); + +/* + * hcall to get fault CRB from the hypervisor. + */ +static int h_get_nx_fault(u32 winid, u64 buffer) +{ + long rc; + + rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer); + + if (rc == H_SUCCESS) + return 0; + + pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n", + rc, winid, buffer); + return -EIO; + +} + +/* + * Handle the fault interrupt. + * When the fault interrupt is received for each window, query the + * hypervisor to get the fault CRB on the specific fault. Then + * process the CRB by updating CSB or send signal if the user space + * CSB is invalid. + * Note: The hypervisor forwards an interrupt for each fault request. + * So one fault CRB to process for each H_GET_NX_FAULT hcall. + */ +irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data) +{ + struct pseries_vas_window *txwin = data; + struct coprocessor_request_block crb; + struct vas_user_win_ref *tsk_ref; + int rc; + + rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb)); + if (!rc) { + tsk_ref = &txwin->vas_win.task_ref; + vas_dump_crb(&crb); + vas_update_csb(&crb, tsk_ref); + } + + return IRQ_HANDLED; +} + +/* + * Allocate window and setup IRQ mapping. + */ +static int allocate_setup_window(struct pseries_vas_window *txwin, + u64 *domain, u8 wintype) +{ + int rc; + + rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS); + if (rc) + return rc; + /* + * On PowerVM, the hypervisor setup and forwards the fault + * interrupt per window. So the IRQ setup and fault handling + * will be done for each open window separately. + */ + txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq); + if (!txwin->fault_virq) { + pr_err("Failed irq mapping %d\n", txwin->fault_irq); + rc = -EINVAL; + goto out_win; + } + + txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d", + txwin->vas_win.winid); + if (!txwin->name) { + rc = -ENOMEM; + goto out_irq; + } + + rc = request_threaded_irq(txwin->fault_virq, NULL, + pseries_vas_fault_thread_fn, IRQF_ONESHOT, + txwin->name, txwin); + if (rc) { + pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n", + txwin->vas_win.winid, txwin->fault_virq, rc); + goto out_free; + } + + txwin->vas_win.wcreds_max = DEF_WIN_CREDS; + + return 0; +out_free: + kfree(txwin->name); +out_irq: + irq_dispose_mapping(txwin->fault_virq); +out_win: + h_deallocate_vas_window(txwin->vas_win.winid); + return rc; +} + +static inline void free_irq_setup(struct pseries_vas_window *txwin) +{ + free_irq(txwin->fault_virq, txwin); + kfree(txwin->name); + irq_dispose_mapping(txwin->fault_virq); +} + +static struct vas_window *vas_allocate_window(int vas_id, u64 flags, + enum vas_cop_type cop_type) +{ + long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID}; + struct vas_cop_feat_caps *cop_feat_caps; + struct vas_caps *caps; + struct pseries_vas_window *txwin; + int rc; + + txwin = kzalloc(sizeof(*txwin), GFP_KERNEL); + if (!txwin) + return ERR_PTR(-ENOMEM); + + /* + * A VAS window can have many credits which means that many + * requests can be issued simultaneously. But the hypervisor + * restricts one credit per window. + * The hypervisor introduces 2 different types of credits: + * Default credit type (Uses normal priority FIFO): + * A limited number of credits are assigned to partitions + * based on processor entitlement. But these credits may be + * over-committed on a system depends on whether the CPUs + * are in shared or dedicated modes - that is, more requests + * may be issued across the system than NX can service at + * once which can result in paste command failure (RMA_busy). + * Then the process has to resend requests or fall-back to + * SW compression. + * Quality of Service (QoS) credit type (Uses high priority FIFO): + * To avoid NX HW contention, the system admins can assign + * QoS credits for each LPAR so that this partition is + * guaranteed access to NX resources. These credits are + * assigned to partitions via the HMC. + * Refer PAPR for more information. + * + * Allocate window with QoS credits if user requested. Otherwise + * default credits are used. + */ + if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT) + caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE]; + else + caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE]; + + cop_feat_caps = &caps->caps; + + if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) > + atomic_read(&cop_feat_caps->target_lpar_creds)) { + pr_err("Credits are not available to allocate window\n"); + rc = -EINVAL; + goto out; + } + + if (vas_id == -1) { + /* + * The user space is requesting to allocate a window on + * a VAS instance where the process is executing. + * On PowerVM, domain values are passed to the hypervisor + * to select VAS instance. Useful if the process is + * affinity to NUMA node. + * The hypervisor selects VAS instance if + * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values. + * The h_allocate_vas_window hcall is defined to take a + * domain values as specified by h_home_node_associativity, + * So no unpacking needs to be done. + */ + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain, + VPHN_FLAG_VCPU, smp_processor_id()); + if (rc != H_SUCCESS) { + pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc); + goto out; + } + } + + /* + * Allocate / Deallocate window hcalls and setup / free IRQs + * have to be protected with mutex. + * Open VAS window: Allocate window hcall and setup IRQ + * Close VAS window: Deallocate window hcall and free IRQ + * The hypervisor waits until all NX requests are + * completed before closing the window. So expects OS + * to handle NX faults, means IRQ can be freed only + * after the deallocate window hcall is returned. + * So once the window is closed with deallocate hcall before + * the IRQ is freed, it can be assigned to new allocate + * hcall with the same fault IRQ by the hypervisor. It can + * result in setup IRQ fail for the new window since the + * same fault IRQ is not freed by the OS before. + */ + mutex_lock(&vas_pseries_mutex); + rc = allocate_setup_window(txwin, (u64 *)&domain[0], + cop_feat_caps->win_type); + mutex_unlock(&vas_pseries_mutex); + if (rc) + goto out; + + /* + * Modify window and it is ready to use. + */ + rc = h_modify_vas_window(txwin); + if (!rc) + rc = get_vas_user_win_ref(&txwin->vas_win.task_ref); + if (rc) + goto out_free; + + vas_user_win_add_mm_context(&txwin->vas_win.task_ref); + txwin->win_type = cop_feat_caps->win_type; + mutex_lock(&vas_pseries_mutex); + list_add(&txwin->win_list, &caps->list); + mutex_unlock(&vas_pseries_mutex); + + return &txwin->vas_win; + +out_free: + /* + * Window is not operational. Free IRQ before closing + * window so that do not have to hold mutex. + */ + free_irq_setup(txwin); + h_deallocate_vas_window(txwin->vas_win.winid); +out: + atomic_dec(&cop_feat_caps->used_lpar_creds); + kfree(txwin); + return ERR_PTR(rc); +} + +static u64 vas_paste_address(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + return win->win_addr; +} + +static int deallocate_free_window(struct pseries_vas_window *win) +{ + int rc = 0; + + /* + * The hypervisor waits for all requests including faults + * are processed before closing the window - Means all + * credits have to be returned. In the case of fault + * request, a credit is returned after OS issues + * H_GET_NX_FAULT hcall. + * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW + * hcall. + */ + rc = h_deallocate_vas_window(win->vas_win.winid); + if (!rc) + free_irq_setup(win); + + return rc; +} + +static int vas_deallocate_window(struct vas_window *vwin) +{ + struct pseries_vas_window *win; + struct vas_cop_feat_caps *caps; + int rc = 0; + + if (!vwin) + return -EINVAL; + + win = container_of(vwin, struct pseries_vas_window, vas_win); + + /* Should not happen */ + if (win->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Window (%u): Invalid window type %u\n", + vwin->winid, win->win_type); + return -EINVAL; + } + + caps = &vascaps[win->win_type].caps; + mutex_lock(&vas_pseries_mutex); + rc = deallocate_free_window(win); + if (rc) { + mutex_unlock(&vas_pseries_mutex); + return rc; + } + + list_del(&win->win_list); + atomic_dec(&caps->used_lpar_creds); + mutex_unlock(&vas_pseries_mutex); + + put_vas_user_win_ref(&vwin->task_ref); + mm_context_remove_vas_window(vwin->task_ref.mm); + + kfree(win); + return 0; +} + +static const struct vas_user_win_ops vops_pseries = { + .open_win = vas_allocate_window, /* Open and configure window */ + .paste_addr = vas_paste_address, /* To do copy/paste */ + .close_win = vas_deallocate_window, /* Close window */ +}; + +/* + * Supporting only nx-gzip coprocessor type now, but this API code + * extended to other coprocessor types later. + */ +int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type, + const char *name) +{ + int rc; + + if (!copypaste_feat) + return -ENOTSUPP; + + rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries); + + return rc; +} +EXPORT_SYMBOL_GPL(vas_register_api_pseries); + +void vas_unregister_api_pseries(void) +{ + vas_unregister_coproc_api(); +} +EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); + +/* + * Get the specific capabilities based on the feature type. + * Right now supports GZIP default and GZIP QoS capabilities. + */ +static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, + struct hv_vas_cop_feat_caps *hv_caps) +{ + struct vas_cop_feat_caps *caps; + struct vas_caps *vcaps; + int rc = 0; + + vcaps = &vascaps[type]; + memset(vcaps, 0, sizeof(*vcaps)); + INIT_LIST_HEAD(&vcaps->list); + + caps = &vcaps->caps; + + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat, + (u64)virt_to_phys(hv_caps)); + if (rc) + return rc; + + caps->user_mode = hv_caps->user_mode; + if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) { + pr_err("User space COPY/PASTE is not supported\n"); + return -ENOTSUPP; + } + + caps->descriptor = be64_to_cpu(hv_caps->descriptor); + caps->win_type = hv_caps->win_type; + if (caps->win_type >= VAS_MAX_FEAT_TYPE) { + pr_err("Unsupported window type %u\n", caps->win_type); + return -EINVAL; + } + caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds); + caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds); + atomic_set(&caps->target_lpar_creds, + be16_to_cpu(hv_caps->target_lpar_creds)); + if (feat == VAS_GZIP_DEF_FEAT) { + caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds); + + if (caps->max_win_creds < DEF_WIN_CREDS) { + pr_err("Window creds(%u) > max allowed window creds(%u)\n", + DEF_WIN_CREDS, caps->max_win_creds); + return -EINVAL; + } + } + + copypaste_feat = true; + + return 0; +} + +static int __init pseries_vas_init(void) +{ + struct hv_vas_cop_feat_caps *hv_cop_caps; + struct hv_vas_all_caps *hv_caps; + int rc; + + /* + * Linux supports user space COPY/PASTE only with Radix + */ + if (!radix_enabled()) { + pr_err("API is supported only with radix page tables\n"); + return -ENOTSUPP; + } + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return -ENOMEM; + /* + * Get VAS overall capabilities by passing 0 to feature type. + */ + rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_all.descriptor = be64_to_cpu(hv_caps->descriptor); + caps_all.feat_type = be64_to_cpu(hv_caps->feat_type); + + hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL); + if (!hv_cop_caps) { + rc = -ENOMEM; + goto out; + } + /* + * QOS capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT, + VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps); + + if (rc) + goto out_cop; + } + /* + * Default capabilities available + */ + if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) { + rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT, + VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps); + if (rc) + goto out_cop; + } + + pr_info("GZIP feature is available\n"); + +out_cop: + kfree(hv_cop_caps); +out: + kfree(hv_caps); + return rc; +} +machine_device_initcall(pseries, pseries_vas_init); diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h new file mode 100644 index 000000000000..4ecb3fcabd10 --- /dev/null +++ b/arch/powerpc/platforms/pseries/vas.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2020-21 IBM Corp. + */ + +#ifndef _VAS_H +#define _VAS_H +#include <asm/vas.h> +#include <linux/mutex.h> +#include <linux/stringify.h> + +/* + * VAS window modify flags + */ +#define VAS_MOD_WIN_CLOSE PPC_BIT(0) +#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1) +#define VAS_MOD_WIN_DR PPC_BIT(3) +#define VAS_MOD_WIN_PR PPC_BIT(4) +#define VAS_MOD_WIN_SF PPC_BIT(5) +#define VAS_MOD_WIN_TA PPC_BIT(6) +#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \ + VAS_MOD_WIN_PR | VAS_MOD_WIN_SF) + +#define VAS_WIN_ACTIVE 0x0 +#define VAS_WIN_CLOSED 0x1 +#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */ +/* Process of being modified, deallocated, or quiesced */ +#define VAS_WIN_MOD_IN_PROCESS 0x3 + +#define VAS_COPY_PASTE_USER_MODE 0x00000001 +#define VAS_COP_OP_USER_MODE 0x00000010 + +/* + * Co-processor feature - GZIP QoS windows or GZIP default windows + */ +enum vas_cop_feat_type { + VAS_GZIP_QOS_FEAT_TYPE, + VAS_GZIP_DEF_FEAT_TYPE, + VAS_MAX_FEAT_TYPE, +}; + +/* + * Use to get feature specific capabilities from the + * hypervisor. + */ +struct hv_vas_cop_feat_caps { + __be64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; + __be16 max_lpar_creds; + __be16 max_win_creds; + union { + __be16 reserved; + __be16 def_lpar_creds; /* Used for default capabilities */ + }; + __be16 target_lpar_creds; +} __packed __aligned(0x1000); + +/* + * Feature specific (QoS or default) capabilities. + */ +struct vas_cop_feat_caps { + u64 descriptor; + u8 win_type; /* Default or QoS type */ + u8 user_mode; /* User mode copy/paste or COP HCALL */ + u16 max_lpar_creds; /* Max credits available in LPAR */ + /* Max credits can be assigned per window */ + u16 max_win_creds; + union { + u16 reserved; /* Used for QoS credit type */ + u16 def_lpar_creds; /* Used for default credit type */ + }; + /* Total LPAR available credits. Can be different from max LPAR */ + /* credits due to DLPAR operation */ + atomic_t target_lpar_creds; + atomic_t used_lpar_creds; /* Used credits so far */ + u16 avail_lpar_creds; /* Remaining available credits */ +}; + +/* + * Feature (QoS or Default) specific to store capabilities and + * the list of open windows. + */ +struct vas_caps { + struct vas_cop_feat_caps caps; + struct list_head list; /* List of open windows */ +}; + +/* + * To get window information from the hypervisor. + */ +struct hv_vas_win_lpar { + __be16 version; + u8 win_type; + u8 status; + __be16 credits; /* No of credits assigned to this window */ + __be16 reserved; + __be32 pid; /* LPAR Process ID */ + __be32 tid; /* LPAR Thread ID */ + __be64 win_addr; /* Paste address */ + __be32 interrupt; /* Interrupt when NX request completes */ + __be32 fault; /* Interrupt when NX sees fault */ + /* Associativity Domain Identifiers as returned in */ + /* H_HOME_NODE_ASSOCIATIVITY */ + __be64 domain[6]; + __be64 win_util; /* Number of bytes processed */ +} __packed __aligned(0x1000); + +struct pseries_vas_window { + struct vas_window vas_win; + u64 win_addr; /* Physical paste address */ + u8 win_type; /* QoS or Default window */ + u32 complete_irq; /* Completion interrupt */ + u32 fault_irq; /* Fault interrupt */ + u64 domain[6]; /* Associativity domain Ids */ + /* this window is allocated */ + u64 util; + + /* List of windows opened which is used for LPM */ + struct list_head win_list; + u64 flags; + char *name; + int fault_virq; +}; +#endif /* _VAS_H */ diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 69af73765783..b8f76f3fd994 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1072,7 +1072,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { - regs->nip += 4; + regs_add_return_ip(regs, 4); return 1; } } diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cf..5a95b8ea23d8 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -108,8 +108,8 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) __func__); out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); - regs->msr |= MSR_RI; - regs->nip = extable_fixup(entry); + regs_set_return_msr(regs, regs->msr | MSR_RI); + regs_set_return_ip(regs, extable_fixup(entry)); return 1; } } diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 304614c920aa..063d9195891f 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -12,3 +12,6 @@ config PPC_ICP_HV config PPC_ICS_RTAS def_bool n + +config PPC_ICS_NATIVE + def_bool n diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile index ba1e3117b1c0..747063927c6c 100644 --- a/arch/powerpc/sysdev/xics/Makefile +++ b/arch/powerpc/sysdev/xics/Makefile @@ -4,4 +4,5 @@ obj-y += xics-common.o obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o +obj-$(CONFIG_PPC_ICS_NATIVE) += ics-native.o obj-$(CONFIG_PPC_POWERNV) += ics-opal.o icp-opal.o diff --git a/arch/powerpc/sysdev/xics/ics-native.c b/arch/powerpc/sysdev/xics/ics-native.c new file mode 100644 index 000000000000..d450502f4053 --- /dev/null +++ b/arch/powerpc/sysdev/xics/ics-native.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * ICS backend for OPAL managed interrupts. + * + * Copyright 2011 IBM Corp. + */ + +//#define DEBUG + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/msi.h> +#include <linux/list.h> + +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/errno.h> +#include <asm/xics.h> +#include <asm/opal.h> +#include <asm/firmware.h> + +struct ics_native { + struct ics ics; + struct device_node *node; + void __iomem *base; + u32 ibase; + u32 icount; +}; +#define to_ics_native(_ics) container_of(_ics, struct ics_native, ics) + +static void __iomem *ics_native_xive(struct ics_native *in, unsigned int vec) +{ + return in->base + 0x800 + ((vec - in->ibase) << 2); +} + +static void ics_native_unmask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + unsigned int server; + + pr_devel("ics-native: unmask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); + out_be32(ics_native_xive(in, vec), (server << 8) | DEFAULT_PRIORITY); +} + +static unsigned int ics_native_startup(struct irq_data *d) +{ +#ifdef CONFIG_PCI_MSI + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + if (irq_data_get_msi_desc(d)) + pci_msi_unmask_irq(d); +#endif + + /* unmask it */ + ics_native_unmask_irq(d); + return 0; +} + +static void ics_native_do_mask(struct ics_native *in, unsigned int vec) +{ + out_be32(ics_native_xive(in, vec), 0xff); +} + +static void ics_native_mask_irq(struct irq_data *d) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + + pr_devel("ics-native: mask virq %d [hw 0x%x]\n", d->irq, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + ics_native_do_mask(in, vec); +} + +static int ics_native_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, + bool force) +{ + unsigned int vec = (unsigned int)irqd_to_hwirq(d); + struct ics *ics = irq_data_get_irq_chip_data(d); + struct ics_native *in = to_ics_native(ics); + int server; + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + server = xics_get_irq_server(d->irq, cpumask, 1); + if (server == -1) { + pr_warn("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); + return -1; + } + + xive = in_be32(ics_native_xive(in, vec)); + xive = (xive & 0xff) | (server << 8); + out_be32(ics_native_xive(in, vec), xive); + + return IRQ_SET_MASK_OK; +} + +static struct irq_chip ics_native_irq_chip = { + .name = "ICS", + .irq_startup = ics_native_startup, + .irq_mask = ics_native_mask_irq, + .irq_unmask = ics_native_unmask_irq, + .irq_eoi = NULL, /* Patched at init time */ + .irq_set_affinity = ics_native_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, +}; + +static int ics_native_map(struct ics *ics, unsigned int virq) +{ + unsigned int vec = (unsigned int)virq_to_hw(virq); + struct ics_native *in = to_ics_native(ics); + + pr_devel("%s: vec=0x%x\n", __func__, vec); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + irq_set_chip_and_handler(virq, &ics_native_irq_chip, handle_fasteoi_irq); + irq_set_chip_data(virq, ics); + + return 0; +} + +static void ics_native_mask_unknown(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return; + + ics_native_do_mask(in, vec); +} + +static long ics_native_get_server(struct ics *ics, unsigned long vec) +{ + struct ics_native *in = to_ics_native(ics); + u32 xive; + + if (vec < in->ibase || vec >= (in->ibase + in->icount)) + return -EINVAL; + + xive = in_be32(ics_native_xive(in, vec)); + return (xive >> 8) & 0xfff; +} + +static int ics_native_host_match(struct ics *ics, struct device_node *node) +{ + struct ics_native *in = to_ics_native(ics); + + return in->node == node; +} + +static struct ics ics_native_template = { + .map = ics_native_map, + .mask_unknown = ics_native_mask_unknown, + .get_server = ics_native_get_server, + .host_match = ics_native_host_match, +}; + +static int __init ics_native_add_one(struct device_node *np) +{ + struct ics_native *ics; + u32 ranges[2]; + int rc, count; + + ics = kzalloc(sizeof(struct ics_native), GFP_KERNEL); + if (!ics) + return -ENOMEM; + ics->node = of_node_get(np); + memcpy(&ics->ics, &ics_native_template, sizeof(struct ics)); + + ics->base = of_iomap(np, 0); + if (!ics->base) { + pr_err("Failed to map %pOFP\n", np); + rc = -ENOMEM; + goto fail; + } + + count = of_property_count_u32_elems(np, "interrupt-ranges"); + if (count < 2 || count & 1) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + rc = -EINVAL; + goto fail; + } + if (count > 2) { + pr_warn("ICS %pOFP has %d ranges, only one supported\n", + np, count >> 1); + } + rc = of_property_read_u32_array(np, "interrupt-ranges", + ranges, 2); + if (rc) { + pr_err("Failed to read interrupt-ranges of %pOFP\n", np); + goto fail; + } + ics->ibase = ranges[0]; + ics->icount = ranges[1]; + + pr_info("ICS native initialized for sources %d..%d\n", + ics->ibase, ics->ibase + ics->icount - 1); + + /* Register ourselves */ + xics_register_ics(&ics->ics); + + return 0; +fail: + of_node_put(ics->node); + kfree(ics); + return rc; +} + +int __init ics_native_init(void) +{ + struct device_node *ics; + bool found_one = false; + + /* We need to patch our irq chip's EOI to point to the + * right ICP + */ + ics_native_irq_chip.irq_eoi = icp_ops->eoi; + + /* Find native ICS in the device-tree */ + for_each_compatible_node(ics, NULL, "openpower,xics-sources") { + if (ics_native_add_one(ics) == 0) + found_one = true; + } + + if (found_one) + pr_info("ICS native backend registered\n"); + + return found_one ? 0 : -ENODEV; +} diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index fdf8db4444b6..b14c502e56a8 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -477,6 +477,8 @@ void __init xics_init(void) if (rc < 0) rc = ics_opal_init(); if (rc < 0) + rc = ics_native_init(); + if (rc < 0) pr_warn("XICS: Cannot find a Source Controller !\n"); /* Initialize common bits */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 84de2d7c2f40..da4d7f225a40 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -70,6 +70,9 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE; static unsigned long xmon_taken = 1; static int xmon_owner; static int xmon_gate; +static int xmon_batch; +static unsigned long xmon_batch_start_cpu; +static cpumask_t xmon_batch_cpus = CPU_MASK_NONE; #else #define xmon_owner 0 #endif /* CONFIG_SMP */ @@ -100,7 +103,7 @@ static long *xmon_fault_jmp[NR_CPUS]; /* Breakpoint stuff */ struct bpt { unsigned long address; - struct ppc_inst *instr; + u32 *instr; atomic_t ref_count; int enabled; unsigned long pad; @@ -133,6 +136,12 @@ static void prdump(unsigned long, long); static int ppc_inst_dump(unsigned long, long, int); static void dump_log_buf(void); +#ifdef CONFIG_SMP +static int xmon_switch_cpu(unsigned long); +static int xmon_batch_next_cpu(void); +static int batch_cmds(struct pt_regs *); +#endif + #ifdef CONFIG_PPC_POWERNV static void dump_opal_msglog(void); #else @@ -216,7 +225,8 @@ Commands:\n\ #ifdef CONFIG_SMP "\ c print cpus stopped in xmon\n\ - c# try to switch to cpu number h (in hex)\n" + c# try to switch to cpu number h (in hex)\n\ + c# $ run command '$' (one of 'r','S' or 't') on all cpus in xmon\n" #endif "\ C checksum\n\ @@ -489,10 +499,10 @@ static void xmon_touch_watchdogs(void) touch_nmi_watchdog(); } -static int xmon_core(struct pt_regs *regs, int fromipi) +static int xmon_core(struct pt_regs *regs, volatile int fromipi) { - int cmd = 0; - struct bpt *bp; + volatile int cmd = 0; + struct bpt *volatile bp; long recurse_jmp[JMP_BUF_LEN]; bool locked_down; unsigned long offset; @@ -514,7 +524,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); } @@ -644,7 +654,12 @@ static int xmon_core(struct pt_regs *regs, int fromipi) spin_cpu_relax(); touch_nmi_watchdog(); } else { - if (!locked_down) + cmd = 1; +#ifdef CONFIG_SMP + if (xmon_batch) + cmd = batch_cmds(regs); +#endif + if (!locked_down && cmd) cmd = cmds(regs); if (locked_down || cmd != 0) { /* exiting xmon */ @@ -702,7 +717,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (regs->msr & MSR_DE) { bp = at_breakpoint(regs->nip); if (bp != NULL) { - regs->nip = (unsigned long) &bp->instr[0]; + regs_set_return_ip(regs, (unsigned long) &bp->instr[0]); atomic_inc(&bp->ref_count); } } @@ -712,7 +727,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (bp != NULL) { int stepped = emulate_step(regs, ppc_inst_read(bp->instr)); if (stepped == 0) { - regs->nip = (unsigned long) &bp->instr[0]; + regs_set_return_ip(regs, (unsigned long) &bp->instr[0]); atomic_inc(&bp->ref_count); } else if (stepped < 0) { printf("Couldn't single-step %s instruction\n", @@ -766,7 +781,7 @@ static int xmon_bpt(struct pt_regs *regs) /* Are we at the trap at bp->instr[1] for some bp? */ bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL && (offset == 4 || offset == 8)) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); return 1; } @@ -836,7 +851,7 @@ static int xmon_fault_handler(struct pt_regs *regs) if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) == (MSR_IR|MSR_64BIT)) { bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { - regs->nip = bp->address + offset; + regs_set_return_ip(regs, bp->address + offset); atomic_dec(&bp->ref_count); } } @@ -857,7 +872,7 @@ static inline void force_enable_xmon(void) static struct bpt *at_breakpoint(unsigned long pc) { int i; - struct bpt *bp; + struct bpt *volatile bp; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) @@ -946,11 +961,11 @@ static void insert_bpts(void) } patch_instruction(bp->instr, instr); - patch_instruction(ppc_inst_next(bp->instr, &instr), + patch_instruction(ppc_inst_next(bp->instr, bp->instr), ppc_inst(bpinstr)); if (bp->enabled & BP_CIABR) continue; - if (patch_instruction((struct ppc_inst *)bp->address, + if (patch_instruction((u32 *)bp->address, ppc_inst(bpinstr)) != 0) { printf("Couldn't write instruction at %lx, " "disabling breakpoint there\n", bp->address); @@ -992,7 +1007,7 @@ static void remove_bpts(void) if (mread_instr(bp->address, &instr) && ppc_inst_equal(instr, ppc_inst(bpinstr)) && patch_instruction( - (struct ppc_inst *)bp->address, ppc_inst_read(bp->instr)) != 0) + (u32 *)bp->address, ppc_inst_read(bp->instr)) != 0) printf("Couldn't remove breakpoint at %lx\n", bp->address); } @@ -1188,7 +1203,7 @@ cmds(struct pt_regs *excp) #ifdef CONFIG_BOOKE static int do_step(struct pt_regs *regs) { - regs->msr |= MSR_DE; + regs_set_return_msr(regs, regs->msr | MSR_DE); mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); return 1; } @@ -1221,7 +1236,7 @@ static int do_step(struct pt_regs *regs) } } } - regs->msr |= MSR_SE; + regs_set_return_msr(regs, regs->msr | MSR_SE); return 1; } #endif @@ -1243,11 +1258,112 @@ static void bootcmds(void) } } +#ifdef CONFIG_SMP +static int xmon_switch_cpu(unsigned long cpu) +{ + int timeout; + + xmon_taken = 0; + mb(); + xmon_owner = cpu; + timeout = 10000000; + while (!xmon_taken) { + if (--timeout == 0) { + if (test_and_set_bit(0, &xmon_taken)) + break; + /* take control back */ + mb(); + xmon_owner = smp_processor_id(); + printf("cpu 0x%lx didn't take control\n", cpu); + return 0; + } + barrier(); + } + return 1; +} + +static int xmon_batch_next_cpu(void) +{ + unsigned long cpu; + + while (!cpumask_empty(&xmon_batch_cpus)) { + cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus, + xmon_batch_start_cpu, true); + if (cpu == nr_cpumask_bits) + break; + if (xmon_batch_start_cpu == -1) + xmon_batch_start_cpu = cpu; + if (xmon_switch_cpu(cpu)) + return 0; + cpumask_clear_cpu(cpu, &xmon_batch_cpus); + } + + xmon_batch = 0; + printf("%x:mon> \n", smp_processor_id()); + return 1; +} + +static int batch_cmds(struct pt_regs *excp) +{ + int cmd; + + /* simulate command entry */ + cmd = xmon_batch; + termch = '\n'; + + last_cmd = NULL; + xmon_regs = excp; + + printf("%x:", smp_processor_id()); + printf("mon> "); + printf("%c\n", (char)cmd); + + switch (cmd) { + case 'r': + prregs(excp); /* print regs */ + break; + case 'S': + super_regs(); + break; + case 't': + backtrace(excp); + break; + } + + cpumask_clear_cpu(smp_processor_id(), &xmon_batch_cpus); + + return xmon_batch_next_cpu(); +} + static int cpu_cmd(void) { -#ifdef CONFIG_SMP unsigned long cpu, first_cpu, last_cpu; - int timeout; + + cpu = skipbl(); + if (cpu == '#') { + xmon_batch = skipbl(); + if (xmon_batch) { + switch (xmon_batch) { + case 'r': + case 'S': + case 't': + cpumask_copy(&xmon_batch_cpus, &cpus_in_xmon); + if (cpumask_weight(&xmon_batch_cpus) <= 1) { + printf("There are no other cpus in xmon\n"); + break; + } + xmon_batch_start_cpu = -1; + if (!xmon_batch_next_cpu()) + return 1; + break; + default: + printf("c# only supports 'r', 'S' and 't' commands\n"); + } + xmon_batch = 0; + return 0; + } + } + termch = cpu; if (!scanhex(&cpu)) { /* print cpus waiting or in xmon */ @@ -1279,27 +1395,15 @@ static int cpu_cmd(void) #endif return 0; } - xmon_taken = 0; - mb(); - xmon_owner = cpu; - timeout = 10000000; - while (!xmon_taken) { - if (--timeout == 0) { - if (test_and_set_bit(0, &xmon_taken)) - break; - /* take control back */ - mb(); - xmon_owner = smp_processor_id(); - printf("cpu 0x%lx didn't take control\n", cpu); - return 0; - } - barrier(); - } - return 1; + + return xmon_switch_cpu(cpu); +} #else +static int cpu_cmd(void) +{ return 0; -#endif /* CONFIG_SMP */ } +#endif /* CONFIG_SMP */ static unsigned short fcstab[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, @@ -2214,7 +2318,7 @@ mread_instr(unsigned long adrs, struct ppc_inst *instr) if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); - *instr = ppc_inst_read((struct ppc_inst *)adrs); + *instr = ppc_inst_read((u32 *)adrs); sync(); /* wait a little while to see if we get a machine check */ __delay(200); @@ -2975,7 +3079,7 @@ static void dump_log_buf(void) { struct kmsg_dump_iter iter; - unsigned char buf[128]; + static unsigned char buf[1024]; size_t len; if (setjmp(bus_error_jmp) != 0) { @@ -3005,7 +3109,7 @@ static void dump_opal_msglog(void) { unsigned char buf[128]; ssize_t res; - loff_t pos = 0; + volatile loff_t pos = 0; if (!firmware_has_feature(FW_FEATURE_OPAL)) { printf("Machine is not running OPAL firmware.\n"); @@ -3160,7 +3264,7 @@ memzcan(void) printf("%.8lx\n", a - mskip); } -static void show_task(struct task_struct *tsk) +static void show_task(struct task_struct *volatile tsk) { unsigned int p_state = READ_ONCE(tsk->__state); char state; @@ -3205,7 +3309,7 @@ static void format_pte(void *ptep, unsigned long pte) static void show_pte(unsigned long addr) { unsigned long tskv = 0; - struct task_struct *tsk = NULL; + struct task_struct *volatile tsk = NULL; struct mm_struct *mm; pgd_t *pgdp; p4d_t *p4dp; @@ -3300,7 +3404,7 @@ static void show_pte(unsigned long addr) static void show_tasks(void) { unsigned long tskv; - struct task_struct *tsk = NULL; + struct task_struct *volatile tsk = NULL; printf(" task_struct ->thread.ksp ->thread.regs PID PPID S P CMD\n"); @@ -3623,7 +3727,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, const char *after) { char *modname; - const char *name = NULL; + const char *volatile name = NULL; unsigned long offset, size; printf(REG, address); @@ -4055,7 +4159,7 @@ void xmon_register_spus(struct list_head *list) static void stop_spus(void) { struct spu *spu; - int i; + volatile int i; u64 tmp; for (i = 0; i < XMON_NUM_SPUS; i++) { @@ -4096,7 +4200,7 @@ static void stop_spus(void) static void restart_spus(void) { struct spu *spu; - int i; + volatile int i; for (i = 0; i < XMON_NUM_SPUS; i++) { if (!spu_info[i].spu) diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig index 23e3d0160e67..2a35e0e785bd 100644 --- a/drivers/crypto/nx/Kconfig +++ b/drivers/crypto/nx/Kconfig @@ -29,6 +29,7 @@ if CRYPTO_DEV_NX_COMPRESS config CRYPTO_DEV_NX_COMPRESS_PSERIES tristate "Compression acceleration support on pSeries platform" depends on PPC_PSERIES && IBMVIO + depends on PPC_VAS default y help Support for PowerPC Nest (NX) compression acceleration. This diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bc89a20e5d9d..d00181a26dd6 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -14,5 +14,5 @@ nx-crypto-objs := nx.o \ obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o nx-compress-objs := nx-842.o -nx-compress-pseries-objs := nx-842-pseries.o +nx-compress-pseries-objs := nx-common-pseries.o nx-compress-powernv-objs := nx-common-powernv.o diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 655361ba9107..32a036ada5d0 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -1092,8 +1092,8 @@ static __init int nx_compress_powernv_init(void) * normal FIFO priority is assigned for userspace. * 842 compression is supported only in kernel. */ - ret = vas_register_coproc_api(THIS_MODULE, VAS_COP_TYPE_GZIP, - "nx-gzip"); + ret = vas_register_api_powernv(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); /* * GZIP is not supported in kernel right now. @@ -1129,7 +1129,7 @@ static void __exit nx_compress_powernv_exit(void) * use. So delete this API use for GZIP engine. */ if (!nx842_ct) - vas_unregister_coproc_api(); + vas_unregister_api_powernv(); crypto_unregister_alg(&nx842_powernv_alg); diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-common-pseries.c index 1491cbfbc071..4e304f6081e4 100644 --- a/drivers/crypto/nx/nx-842-pseries.c +++ b/drivers/crypto/nx/nx-common-pseries.c @@ -9,6 +9,8 @@ */ #include <asm/vio.h> +#include <asm/hvcall.h> +#include <asm/vas.h> #include "nx-842.h" #include "nx_csbcpb.h" /* struct nx_csbcpb */ @@ -19,6 +21,29 @@ MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); MODULE_ALIAS_CRYPTO("842"); MODULE_ALIAS_CRYPTO("842-nx"); +/* + * Coprocessor type specific capabilities from the hypervisor. + */ +struct hv_nx_cop_caps { + __be64 descriptor; + __be64 req_max_processed_len; /* Max bytes in one GZIP request */ + __be64 min_compress_len; /* Min compression size in bytes */ + __be64 min_decompress_len; /* Min decompression size in bytes */ +} __packed __aligned(0x1000); + +/* + * Coprocessor type specific capabilities. + */ +struct nx_cop_caps { + u64 descriptor; + u64 req_max_processed_len; /* Max bytes in one GZIP request */ + u64 min_compress_len; /* Min compression in bytes */ + u64 min_decompress_len; /* Min decompression in bytes */ +}; + +static u64 caps_feat; +static struct nx_cop_caps nx_cop_caps; + static struct nx842_constraints nx842_pseries_constraints = { .alignment = DDE_BUFFER_ALIGN, .multiple = DDE_BUFFER_LAST_MULT, @@ -942,6 +967,36 @@ static struct attribute_group nx842_attribute_group = { .attrs = nx842_sysfs_entries, }; +#define nxcop_caps_read(_name) \ +static ssize_t nxcop_##_name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%lld\n", nx_cop_caps._name); \ +} + +#define NXCT_ATTR_RO(_name) \ + nxcop_caps_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nxcop_##_name##_show, \ + NULL); + +NXCT_ATTR_RO(req_max_processed_len); +NXCT_ATTR_RO(min_compress_len); +NXCT_ATTR_RO(min_decompress_len); + +static struct attribute *nxcop_caps_sysfs_entries[] = { + &dev_attr_req_max_processed_len.attr, + &dev_attr_min_compress_len.attr, + &dev_attr_min_decompress_len.attr, + NULL, +}; + +static struct attribute_group nxcop_caps_attr_group = { + .name = "nx_gzip_caps", + .attrs = nxcop_caps_sysfs_entries, +}; + static struct nx842_driver nx842_pseries_driver = { .name = KBUILD_MODNAME, .owner = THIS_MODULE, @@ -1031,6 +1086,16 @@ static int nx842_probe(struct vio_dev *viodev, goto error; } + if (caps_feat) { + if (sysfs_create_group(&viodev->dev.kobj, + &nxcop_caps_attr_group)) { + dev_err(&viodev->dev, + "Could not create sysfs NX capability entries\n"); + ret = -1; + goto error; + } + } + return 0; error_unlock: @@ -1050,6 +1115,9 @@ static void nx842_remove(struct vio_dev *viodev) pr_info("Removing IBM Power 842 compression device\n"); sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + if (caps_feat) + sysfs_remove_group(&viodev->dev.kobj, &nxcop_caps_attr_group); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); @@ -1065,6 +1133,64 @@ static void nx842_remove(struct vio_dev *viodev) kfree(old_devdata); } +/* + * Get NX capabilities from the hypervisor. + * Only NXGZIP capabilities are provided by the hypersvisor right + * now and these values are available to user space with sysfs. + */ +static void __init nxcop_get_capabilities(void) +{ + struct hv_vas_all_caps *hv_caps; + struct hv_nx_cop_caps *hv_nxc; + int rc; + + hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL); + if (!hv_caps) + return; + /* + * Get NX overall capabilities with feature type=0 + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, 0, + (u64)virt_to_phys(hv_caps)); + if (rc) + goto out; + + caps_feat = be64_to_cpu(hv_caps->feat_type); + /* + * NX-GZIP feature available + */ + if (caps_feat & VAS_NX_GZIP_FEAT_BIT) { + hv_nxc = kmalloc(sizeof(*hv_nxc), GFP_KERNEL); + if (!hv_nxc) + goto out; + /* + * Get capabilities for NX-GZIP feature + */ + rc = h_query_vas_capabilities(H_QUERY_NX_CAPABILITIES, + VAS_NX_GZIP_FEAT, + (u64)virt_to_phys(hv_nxc)); + } else { + pr_err("NX-GZIP feature is not available\n"); + rc = -EINVAL; + } + + if (!rc) { + nx_cop_caps.descriptor = be64_to_cpu(hv_nxc->descriptor); + nx_cop_caps.req_max_processed_len = + be64_to_cpu(hv_nxc->req_max_processed_len); + nx_cop_caps.min_compress_len = + be64_to_cpu(hv_nxc->min_compress_len); + nx_cop_caps.min_decompress_len = + be64_to_cpu(hv_nxc->min_decompress_len); + } else { + caps_feat = 0; + } + + kfree(hv_nxc); +out: + kfree(hv_caps); +} + static const struct vio_device_id nx842_vio_driver_ids[] = { {"ibm,compression-v1", "ibm,compression"}, {"", ""}, @@ -1093,6 +1219,10 @@ static int __init nx842_pseries_init(void) return -ENOMEM; RCU_INIT_POINTER(devdata, new_devdata); + /* + * Get NX capabilities from the hypervisor. + */ + nxcop_get_capabilities(); ret = vio_register_driver(&nx842_vio_driver); if (ret) { @@ -1102,6 +1232,12 @@ static int __init nx842_pseries_init(void) return ret; } + ret = vas_register_api_pseries(THIS_MODULE, VAS_COP_TYPE_GZIP, + "nx-gzip"); + + if (ret) + pr_err("NX-GZIP is not supported. Returned=%d\n", ret); + return 0; } @@ -1112,6 +1248,8 @@ static void __exit nx842_pseries_exit(void) struct nx842_devdata *old_devdata; unsigned long flags; + vas_unregister_api_pseries(); + crypto_unregister_alg(&nx842_pseries_alg); spin_lock_irqsave(&devdata_mutex, flags); diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index e34ae6a442c7..6328abd51ffa 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -358,7 +358,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, ps3_mm_phys_to_lpar(__pa(buf)), bytes, bytes_written); if (result) { - dev_dbg(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " + dev_warn(&dev->core, "%s:%d: lv1_write_virtual_uart failed: " "%s\n", __func__, __LINE__, ps3_result(result)); return result; } diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c index 9d66257e1da5..516e6d14d32e 100644 --- a/drivers/ps3/ps3av.c +++ b/drivers/ps3/ps3av.c @@ -217,9 +217,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, /* send pkt */ res = ps3av_vuart_write(ps3av->dev, send_buf, write_len); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_write() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_write() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -230,9 +230,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, recv_buf, PS3AV_HDR_SIZE, timeout); if (res != PS3AV_HDR_SIZE) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } @@ -240,9 +240,9 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, res = ps3av_vuart_read(ps3av->dev, &recv_buf->cid, recv_buf->size, timeout); if (res < 0) { - dev_dbg(&ps3av->dev->core, - "%s: ps3av_vuart_read() failed (result=%d)\n", - __func__, res); + dev_warn(&ps3av->dev->core, + "%s:%d: ps3av_vuart_read() failed: %s\n", __func__, + __LINE__, ps3_result(res)); return res; } res += PS3AV_HDR_SIZE; /* total len */ @@ -251,8 +251,8 @@ static int ps3av_send_cmd_pkt(const struct ps3av_send_hdr *send_buf, } while (event); if ((cmd | PS3AV_REPLY_BIT) != recv_buf->cid) { - dev_dbg(&ps3av->dev->core, "%s: reply err (result=%x)\n", - __func__, recv_buf->cid); + dev_warn(&ps3av->dev->core, "%s:%d: reply err: %x\n", __func__, + __LINE__, recv_buf->cid); return -EINVAL; } diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index 798f27f40cc2..72b11aa7e0a6 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -249,7 +249,7 @@ static void udbg_hvc_putc(char c) count = hvterm_hvsi_put_chars(0, &c, 1); break; } - } while(count == 0); + } while (count == 0 || count == -EAGAIN); } static int udbg_hvc_getc_poll(void) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 4d0c28c2ba12..e4f3bfe08757 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -400,6 +400,9 @@ void dump_kprobe(struct kprobe *kp); void *alloc_insn_page(void); +void *alloc_optinsn_page(void); +void free_optinsn_page(void *page); + int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 297dc8bbe333..471b1d18a92f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -321,11 +321,21 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, } #ifdef CONFIG_OPTPROBES +void __weak *alloc_optinsn_page(void) +{ + return alloc_insn_page(); +} + +void __weak free_optinsn_page(void *page) +{ + free_insn_page(page); +} + /* For optimized_kprobe buffer */ struct kprobe_insn_cache kprobe_optinsn_slots = { .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex), - .alloc = alloc_insn_page, - .free = free_insn_page, + .alloc = alloc_optinsn_page, + .free = free_optinsn_page, .sym = KPROBE_OPTINSN_PAGE_SYM, .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages), /* .insn_size is initialized later */ diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c index 579f0215c6e7..9836838a529f 100644 --- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c +++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c @@ -14,6 +14,7 @@ #include <time.h> #include <sys/types.h> #include <sys/time.h> +#include <sys/syscall.h> #include <signal.h> static volatile int soak_done; @@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr) unsigned long i; for (i = 0; i < nr; i++) - getppid(); + syscall(__NR_gettid); } #define TIME(A, STR) \ diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index 640fad6cc2c7..0785c2e99d40 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -O3 -m64 -I./include +CFLAGS = -O3 -m64 -I./include -I../include TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk -$(TEST_GEN_FILES): gzip_vas.c +$(TEST_GEN_FILES): gzip_vas.c ../utils.c diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c index b099753b50e4..095195a25687 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c +++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c @@ -60,6 +60,7 @@ #include <assert.h> #include <errno.h> #include <signal.h> +#include "utils.h" #include "nxu.h" #include "nx.h" @@ -70,6 +71,8 @@ FILE *nx_gzip_log; #define FNAME_MAX 1024 #define FEXT ".nx.gz" +#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len" + /* * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. */ @@ -244,6 +247,7 @@ int compress_file(int argc, char **argv, void *handle) struct nx_gzip_crb_cpb_t *cmdp; uint32_t pagelen = 65536; int fault_tries = NX_MAX_FAULTS; + char buf[32]; cmdp = (void *)(uintptr_t) aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), @@ -263,8 +267,17 @@ int compress_file(int argc, char **argv, void *handle) assert(NULL != (outbuf = (char *)malloc(outlen))); nxu_touch_pages(outbuf, outlen, pagelen, 1); - /* Compress piecemeal in smallish chunks */ - chunk = 1<<22; + /* + * On PowerVM, the hypervisor defines the maximum request buffer + * size is defined and this value is available via sysfs. + */ + if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) { + chunk = atoi(buf); + } else { + /* sysfs entry is not available on PowerNV */ + /* Compress piecemeal in smallish chunks */ + chunk = 1<<22; + } /* Write the gzip header to the stream */ num_hdr_bytes = gzip_header_blank(outbuf); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index c5ecb4634094..010160690227 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ fork_cleanup_test ebb_on_child_test \ ebb_on_willing_child_test back_to_back_ebbs_test \ lost_exception_test no_handler_test \ - cycles_with_mmcr2_test + cycles_with_mmcr2_test regs_access_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h index b5bc2b616075..2c803b5b48d6 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h @@ -55,8 +55,6 @@ void ebb_global_disable(void); bool ebb_is_supported(void); void ebb_freeze_pmcs(void); void ebb_unfreeze_pmcs(void); -void event_ebb_init(struct event *e); -void event_leader_ebb_init(struct event *e); int count_pmc(int pmc, uint32_t sample_period); void dump_ebb_state(void); void dump_summary_ebb_state(void); diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c index fc5bf4870d8e..01e827c31169 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c @@ -50,8 +50,6 @@ static int no_handler_test(void) event_close(&event); - dump_ebb_state(); - /* The real test is that we never took an EBB at 0x0 */ return 0; diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c new file mode 100644 index 000000000000..1eda8e9932e8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <setjmp.h> +#include <signal.h> + +#include "ebb.h" + + +/* + * Test that closing the EBB event clears MMCR0_PMCC and + * sets MMCR0_PMCCEXT preventing further read access to the + * group B PMU registers. + */ + +static int regs_access_pmccext(void) +{ + struct event event; + + SKIP_IF(!ebb_is_supported()); + + event_init_named(&event, 0x1001e, "cycles"); + event_leader_ebb_init(&event); + + FAIL_IF(event_open(&event)); + + ebb_enable_pmc_counting(1); + setup_ebb_handler(standard_ebb_callee); + ebb_global_enable(); + FAIL_IF(ebb_event_enable(&event)); + + mtspr(SPRN_PMC1, pmc_sample_period(sample_period)); + + while (ebb_state.stats.ebb_count < 1) + FAIL_IF(core_busy_loop()); + + ebb_global_disable(); + event_close(&event); + + FAIL_IF(ebb_state.stats.ebb_count == 0); + + /* + * For ISA v3.1, verify the test takes a SIGILL when reading + * PMU regs after the event is closed. With the control bit + * in MMCR0 (PMCCEXT) restricting access to group B PMU regs, + * sigill is expected. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + FAIL_IF(catch_sigill(dump_ebb_state)); + else + dump_ebb_state(); + + return 0; +} + +int main(void) +{ + return test_harness(regs_access_pmccext, "regs_access_pmccext"); +} diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 844d18cd5f93..7488315fd847 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2 +TEST_PROGS := mitigation-patching.sh + top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh new file mode 100755 index 000000000000..00197acb7ff1 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +set -euo pipefail + +TIMEOUT=10 + +function do_one +{ + local mitigation="$1" + local orig + local start + local now + + orig=$(cat "$mitigation") + + start=$EPOCHSECONDS + now=$start + + while [[ $((now-start)) -lt "$TIMEOUT" ]] + do + echo 0 > "$mitigation" + echo 1 > "$mitigation" + + now=$EPOCHSECONDS + done + + echo "$orig" > "$mitigation" +} + +rc=0 +cd /sys/kernel/debug/powerpc || rc=1 +if [[ "$rc" -ne 0 ]]; then + echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2 + exit 1 +fi + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel already tainted!" >&2 + exit 1 +fi + +mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" + +for m in $mitigations +do + do_one "$m" & +done + +echo "Spawned threads enabling/disabling mitigations ..." + +if stress-ng > /dev/null 2>&1; then + stress="stress-ng" +elif stress > /dev/null 2>&1; then + stress="stress" +else + stress="" +fi + +if [[ -n "$stress" ]]; then + "$stress" -m "$(nproc)" -t "$TIMEOUT" & + echo "Spawned VM stressors ..." +fi + +echo "Waiting for timeout ..." +wait + +tainted=$(cat /proc/sys/kernel/tainted) +if [[ "$tainted" -ne 0 ]]; then + echo "Error: kernel became tainted!" >&2 + exit 1 +fi + +echo "OK" +exit 0 diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c index e2a0c07e8362..9ef37a9836ac 100644 --- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c +++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c @@ -17,7 +17,6 @@ #include <pthread.h> #include <sys/mman.h> #include <unistd.h> -#include <pthread.h> #include "tm.h" #include "utils.h" |