diff options
Diffstat (limited to 'arch')
160 files changed, 1621 insertions, 1093 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 305f741f3fab..29b0167c088b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -164,7 +164,13 @@ config ARCH_USE_BUILTIN_BSWAP config KRETPROBES def_bool y - depends on KPROBES && HAVE_KRETPROBES + depends on KPROBES && (HAVE_KRETPROBES || HAVE_RETHOOK) + +config KRETPROBE_ON_RETHOOK + def_bool y + depends on HAVE_RETHOOK + depends on KRETPROBES + select RETHOOK config USER_RETURN_NOTIFIER bool diff --git a/arch/alpha/include/asm/user.h b/arch/alpha/include/asm/user.h index 3df37492c7b7..c9f525a6aab8 100644 --- a/arch/alpha/include/asm/user.h +++ b/arch/alpha/include/asm/user.h @@ -45,10 +45,4 @@ struct user { char u_comm[32]; /* user command name */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_DATA_START_ADDR (u.start_data) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) - #endif /* _ALPHA_USER_H */ diff --git a/arch/alpha/kernel/syscalls/Makefile b/arch/alpha/kernel/syscalls/Makefile index 6713c65a25e1..b265e4bc16c2 100644 --- a/arch/alpha/kernel/syscalls/Makefile +++ b/arch/alpha/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 187a187706cb..41bcbb460fac 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -92,6 +92,8 @@ ifeq ($(CONFIG_USE_OF),y) OBJS += $(libfdt_objs) fdt_check_mem_start.o endif +OBJS += lib1funcs.o ashldi3.o bswapsdi2.o + targets := vmlinux vmlinux.lds piggy_data piggy.o \ head.o $(OBJS) @@ -126,8 +128,6 @@ endif # Next argument is a linker script LDFLAGS_vmlinux += -T -OBJS += lib1funcs.o ashldi3.o bswapsdi2.o - # We need to prevent any GOTOFF relocs being used with references # to symbols in the .bss section since we cannot relocate them # independently from the rest at run time. This can be achieved by diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 827e887afbda..13e1bdb3ddbf 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -134,9 +134,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 12 0 1>, - <&dwdma0 13 1 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 13 0 1>, + <&dwdma0 12 1 0>; + dma-names = "rx", "tx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index c87b881b2c8b..913553367687 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -284,9 +284,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 4 0 0>, - <&dwdma0 5 0 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 5 0 0>, + <&dwdma0 4 0 0>; + dma-names = "rx", "tx"; }; rtc@e0580000 { diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2b575792363e..e4dba5461cb3 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -102,6 +102,8 @@ config CRYPTO_AES_ARM_BS depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES + select CRYPTO_AES + select CRYPTO_CBC select CRYPTO_SIMD help Use a faster and more secure NEON based implementation of AES in CBC, diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h index c799a3c49342..167d44b550f4 100644 --- a/arch/arm/include/asm/user.h +++ b/arch/arm/include/asm/user.h @@ -77,10 +77,6 @@ struct user{ struct user_fp_struct * u_fp0;/* Used by gdb to help find the values for */ /* the FP registers. */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) /* * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 039feb7cd590..1e8a50a97edf 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1004,7 +1004,8 @@ static void __init reserve_crashkernel(void) total_mem = get_total_mem(); ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base); - if (ret) + /* invalid value specified or crashkernel=0 */ + if (ret || !crash_size) return; if (crash_base <= 0) { diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index b5efecb3d730..d0fa2037460a 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -54,17 +54,17 @@ int notrace unwind_frame(struct stackframe *frame) return -EINVAL; frame->sp = frame->fp; - frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); #endif #ifdef CONFIG_KRETPROBES if (is_kretprobe_trampoline(frame->pc)) diff --git a/arch/arm/mach-omap2/omap-secure.c b/arch/arm/mach-omap2/omap-secure.c index 0659ab4cb0af..11677fc2968f 100644 --- a/arch/arm/mach-omap2/omap-secure.c +++ b/arch/arm/mach-omap2/omap-secure.c @@ -59,8 +59,13 @@ static void __init omap_optee_init_check(void) u32 omap_secure_dispatcher(u32 idx, u32 flag, u32 nargs, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { + static u32 buf[NR_CPUS][5]; + u32 *param; + int cpu; u32 ret; - u32 param[5]; + + cpu = get_cpu(); + param = buf[cpu]; param[0] = nargs; param[1] = arg1; @@ -76,6 +81,8 @@ u32 omap_secure_dispatcher(u32 idx, u32 flag, u32 nargs, u32 arg1, u32 arg2, outer_clean_range(__pa(param), __pa(param + 5)); ret = omap_smc2(idx, flag, __pa(param)); + put_cpu(); + return ret; } @@ -119,8 +126,8 @@ phys_addr_t omap_secure_ram_mempool_base(void) #if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM) u32 omap3_save_secure_ram(void __iomem *addr, int size) { + static u32 param[5]; u32 ret; - u32 param[5]; if (size != OMAP3_SAVE_SECURE_RAM_SZ) return OMAP3_SAVE_SECURE_RAM_SZ; @@ -153,8 +160,8 @@ u32 omap3_save_secure_ram(void __iomem *addr, int size) u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, u32 arg1, u32 arg2, u32 arg3, u32 arg4) { + static u32 param[5]; u32 ret; - u32 param[5]; param[0] = nargs+1; /* RX-51 needs number of arguments + 1 */ param[1] = arg1; diff --git a/arch/arm/mach-s3c/mach-jive.c b/arch/arm/mach-s3c/mach-jive.c index 285e1f0f4145..0d7d408c3729 100644 --- a/arch/arm/mach-s3c/mach-jive.c +++ b/arch/arm/mach-s3c/mach-jive.c @@ -236,11 +236,11 @@ static int __init jive_mtdset(char *options) unsigned long set; if (options == NULL || options[0] == '\0') - return 0; + return 1; if (kstrtoul(options, 10, &set)) { printk(KERN_ERR "failed to parse mtdset=%s\n", options); - return 0; + return 1; } switch (set) { @@ -256,7 +256,7 @@ static int __init jive_mtdset(char *options) "using default.", set); } - return 0; + return 1; } /* parse the mtdset= option given to the kernel command line */ diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 4b61541853ea..82ffac621854 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -381,6 +381,7 @@ out: */ postcore_initcall(atomic_pool_init); +#ifdef CONFIG_CMA_AREAS struct dma_contig_early_reserve { phys_addr_t base; unsigned long size; @@ -435,6 +436,7 @@ void __init dma_contiguous_remap(void) iotable_init(&map, 1); } } +#endif static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) { diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 9ff683612f2a..d7ffccb7fea7 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -88,6 +88,10 @@ extern phys_addr_t arm_lowmem_limit; void __init bootmem_init(void); void arm_mm_memblock_reserve(void); +#ifdef CONFIG_CMA_AREAS void dma_contiguous_remap(void); +#else +static inline void dma_contiguous_remap(void) { } +#endif unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/tools/Makefile b/arch/arm/tools/Makefile index 4a5c50f67ced..81f13bdf32f2 100644 --- a/arch/arm/tools/Makefile +++ b/arch/arm/tools/Makefile @@ -29,8 +29,7 @@ kapi: $(kapi-hdrs-y) $(gen-y) uapi: $(uapi-hdrs-y) # Create output directory if not already present -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') \ - $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') +$(shell mkdir -p $(kapi) $(uapi)) quiet_cmd_gen_mach = GEN $@ cmd_gen_mach = $(AWK) -f $(real-prereqs) > $@ diff --git a/arch/arm64/boot/dts/amd/Makefile b/arch/arm64/boot/dts/amd/Makefile index 6a6093064a32..68103a8b0ef5 100644 --- a/arch/arm64/boot/dts/amd/Makefile +++ b/arch/arm64/boot/dts/amd/Makefile @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 -dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive.dtb \ - amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb \ - husky.dtb +dtb-$(CONFIG_ARCH_SEATTLE) += amd-overdrive-rev-b0.dtb amd-overdrive-rev-b1.dtb diff --git a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts index 8e341be9a399..c290d1ce2b03 100644 --- a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts +++ b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b0.dts @@ -9,6 +9,7 @@ /dts-v1/; /include/ "amd-seattle-soc.dtsi" +/include/ "amd-seattle-cpus.dtsi" / { model = "AMD Seattle (Rev.B0) Development Board (Overdrive)"; @@ -36,14 +37,6 @@ status = "ok"; }; -&gpio2 { - status = "ok"; -}; - -&gpio3 { - status = "ok"; -}; - &gpio4 { status = "ok"; }; @@ -79,10 +72,6 @@ }; }; -&ipmi_kcs { - status = "ok"; -}; - &smb0 { /include/ "amd-seattle-xgbe-b.dtsi" }; diff --git a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts index 92cef05c6b74..e0926f6bb7c3 100644 --- a/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts +++ b/arch/arm64/boot/dts/amd/amd-overdrive-rev-b1.dts @@ -9,6 +9,7 @@ /dts-v1/; /include/ "amd-seattle-soc.dtsi" +/include/ "amd-seattle-cpus.dtsi" / { model = "AMD Seattle (Rev.B1) Development Board (Overdrive)"; diff --git a/arch/arm64/boot/dts/amd/amd-overdrive.dts b/arch/arm64/boot/dts/amd/amd-overdrive.dts deleted file mode 100644 index 41b3a6c0993d..000000000000 --- a/arch/arm64/boot/dts/amd/amd-overdrive.dts +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DTS file for AMD Seattle Overdrive Development Board - * - * Copyright (C) 2014 Advanced Micro Devices, Inc. - */ - -/dts-v1/; - -/include/ "amd-seattle-soc.dtsi" - -/ { - model = "AMD Seattle Development Board (Overdrive)"; - compatible = "amd,seattle-overdrive", "amd,seattle"; - - chosen { - stdout-path = &serial0; - }; -}; - -&ccp0 { - status = "ok"; -}; - -&gpio0 { - status = "ok"; -}; - -&gpio1 { - status = "ok"; -}; - -&i2c0 { - status = "ok"; -}; - -&pcie0 { - status = "ok"; -}; - -&spi0 { - status = "ok"; -}; - -&spi1 { - status = "ok"; - sdcard0: sdcard@0 { - compatible = "mmc-spi-slot"; - reg = <0>; - spi-max-frequency = <20000000>; - voltage-ranges = <3200 3400>; - gpios = <&gpio0 7 0>; - interrupt-parent = <&gpio0>; - interrupts = <7 3>; - pl022,hierarchy = <0>; - pl022,interface = <0>; - pl022,com-mode = <0x0>; - pl022,rx-level-trig = <0>; - pl022,tx-level-trig = <0>; - }; -}; - -&v2m0 { - arm,msi-base-spi = <64>; - arm,msi-num-spis = <256>; -}; diff --git a/arch/arm64/boot/dts/amd/amd-seattle-cpus.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-cpus.dtsi new file mode 100644 index 000000000000..93688a0b6820 --- /dev/null +++ b/arch/arm64/boot/dts/amd/amd-seattle-cpus.dtsi @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 + +/ { + cpus { + #address-cells = <0x1>; + #size-cells = <0x0>; + + cpu-map { + cluster0 { + core0 { + cpu = <&CPU0>; + }; + core1 { + cpu = <&CPU1>; + }; + }; + cluster1 { + core0 { + cpu = <&CPU2>; + }; + core1 { + cpu = <&CPU3>; + }; + }; + cluster2 { + core0 { + cpu = <&CPU4>; + }; + core1 { + cpu = <&CPU5>; + }; + }; + cluster3 { + core0 { + cpu = <&CPU6>; + }; + core1 { + cpu = <&CPU7>; + }; + }; + }; + + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x0>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_0>; + + }; + + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x1>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_0>; + }; + + CPU2: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x100>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_1>; + }; + + CPU3: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x101>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_1>; + }; + + CPU4: cpu@200 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x200>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_2>; + }; + + CPU5: cpu@201 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x201>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_2>; + }; + + CPU6: cpu@300 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x300>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_3>; + }; + + CPU7: cpu@301 { + device_type = "cpu"; + compatible = "arm,cortex-a57"; + reg = <0x301>; + enable-method = "psci"; + + i-cache-size = <0xC000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <256>; + l2-cache = <&L2_3>; + }; + }; + + L2_0: l2-cache0 { + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; + cache-unified; + next-level-cache = <&L3>; + }; + + L2_1: l2-cache1 { + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; + cache-unified; + next-level-cache = <&L3>; + }; + + L2_2: l2-cache2 { + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; + cache-unified; + next-level-cache = <&L3>; + }; + + L2_3: l2-cache3 { + cache-size = <0x100000>; + cache-line-size = <64>; + cache-sets = <1024>; + cache-unified; + next-level-cache = <&L3>; + }; + + L3: l3-cache { + cache-level = <3>; + cache-size = <0x800000>; + cache-line-size = <64>; + cache-sets = <8192>; + cache-unified; + }; + + pmu { + compatible = "arm,cortex-a57-pmu"; + interrupts = <0x0 0x7 0x4>, + <0x0 0x8 0x4>, + <0x0 0x9 0x4>, + <0x0 0xa 0x4>, + <0x0 0xb 0x4>, + <0x0 0xc 0x4>, + <0x0 0xd 0x4>, + <0x0 0xe 0x4>; + interrupt-affinity = <&CPU0>, + <&CPU1>, + <&CPU2>, + <&CPU3>, + <&CPU4>, + <&CPU5>, + <&CPU6>, + <&CPU7>; + }; +}; diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index b664e7af74eb..690020589d41 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -38,18 +38,6 @@ <1 10 0xff04>; }; - pmu { - compatible = "arm,armv8-pmuv3"; - interrupts = <0 7 4>, - <0 8 4>, - <0 9 4>, - <0 10 4>, - <0 11 4>, - <0 12 4>, - <0 13 4>, - <0 14 4>; - }; - smb0: smb { compatible = "simple-bus"; #address-cells = <2>; @@ -70,6 +58,7 @@ reg = <0 0xe0300000 0 0xf0000>; interrupts = <0 355 4>; clocks = <&sataclk_333mhz>; + iommus = <&sata0_smmu 0x0 0x1f>; dma-coherent; }; @@ -80,6 +69,27 @@ reg = <0 0xe0d00000 0 0xf0000>; interrupts = <0 354 4>; clocks = <&sataclk_333mhz>; + iommus = <&sata1_smmu 0x0e>, + <&sata1_smmu 0x0f>, + <&sata1_smmu 0x1e>; + dma-coherent; + }; + + sata0_smmu: iommu@e0200000 { + compatible = "arm,mmu-401"; + reg = <0 0xe0200000 0 0x10000>; + #global-interrupts = <1>; + interrupts = <0 332 4>, <0 332 4>; + #iommu-cells = <2>; + dma-coherent; + }; + + sata1_smmu: iommu@e0c00000 { + compatible = "arm,mmu-401"; + reg = <0 0xe0c00000 0 0x10000>; + #global-interrupts = <1>; + interrupts = <0 331 4>, <0 331 4>; + #iommu-cells = <1>; dma-coherent; }; @@ -201,6 +211,10 @@ reg = <0 0xe0100000 0 0x10000>; interrupts = <0 3 4>; dma-coherent; + iommus = <&sata1_smmu 0x00>, + <&sata1_smmu 0x02>, + <&sata1_smmu 0x40>, + <&sata1_smmu 0x42>; }; pcie0: pcie@f0000000 { @@ -213,12 +227,22 @@ msi-parent = <&v2m0>; reg = <0 0xf0000000 0 0x10000000>; - interrupt-map-mask = <0xf800 0x0 0x0 0x7>; + interrupt-map-mask = <0xff00 0x0 0x0 0x7>; interrupt-map = - <0x1000 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x120 0x1>, - <0x1000 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x121 0x1>, - <0x1000 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x122 0x1>, - <0x1000 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x123 0x1>; + <0x1100 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x120 0x1>, + <0x1100 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x121 0x1>, + <0x1100 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x122 0x1>, + <0x1100 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x123 0x1>, + + <0x1200 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x124 0x1>, + <0x1200 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x125 0x1>, + <0x1200 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x126 0x1>, + <0x1200 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x127 0x1>, + + <0x1300 0x0 0x0 0x1 &gic0 0x0 0x0 0x0 0x128 0x1>, + <0x1300 0x0 0x0 0x2 &gic0 0x0 0x0 0x0 0x129 0x1>, + <0x1300 0x0 0x0 0x3 &gic0 0x0 0x0 0x0 0x12a 0x1>, + <0x1300 0x0 0x0 0x4 &gic0 0x0 0x0 0x0 0x12b 0x1>; dma-coherent; dma-ranges = <0x43000000 0x0 0x0 0x0 0x0 0x100 0x0>; @@ -227,8 +251,18 @@ <0x01000000 0x00 0x00000000 0x00 0xefff0000 0x00 0x00010000>, /* 32-bit MMIO (size=2G) */ <0x02000000 0x00 0x40000000 0x00 0x40000000 0x00 0x80000000>, - /* 64-bit MMIO (size= 124G) */ + /* 64-bit MMIO (size= 508G) */ <0x03000000 0x01 0x00000000 0x01 0x00000000 0x7f 0x00000000>; + iommu-map = <0x0 &pcie_smmu 0x0 0x10000>; + }; + + pcie_smmu: iommu@e0a00000 { + compatible = "arm,mmu-401"; + reg = <0 0xe0a00000 0 0x10000>; + #global-interrupts = <1>; + interrupts = <0 333 4>, <0 333 4>; + #iommu-cells = <1>; + dma-coherent; }; /* Perf CCN504 PMU */ diff --git a/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi index d97498361ce3..9259e547e2e8 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-xgbe-b.dtsi @@ -55,7 +55,7 @@ clocks = <&xgmacclk0_dma_250mhz>, <&xgmacclk0_ptp_250mhz>; clock-names = "dma_clk", "ptp_clk"; phy-mode = "xgmii"; - #stream-id-cells = <16>; + iommus = <&xgmac0_smmu 0x00 0x17>; /* 0-7, 16-23 */ dma-coherent; }; @@ -81,11 +81,11 @@ clocks = <&xgmacclk1_dma_250mhz>, <&xgmacclk1_ptp_250mhz>; clock-names = "dma_clk", "ptp_clk"; phy-mode = "xgmii"; - #stream-id-cells = <16>; + iommus = <&xgmac1_smmu 0x00 0x17>; /* 0-7, 16-23 */ dma-coherent; }; - xgmac0_smmu: smmu@e0600000 { + xgmac0_smmu: iommu@e0600000 { compatible = "arm,mmu-401"; reg = <0 0xe0600000 0 0x10000>; #global-interrupts = <1>; @@ -94,14 +94,11 @@ */ <0 336 4>, <0 336 4>; - - mmu-masters = <&xgmac0 - 0 1 2 3 4 5 6 7 - 16 17 18 19 20 21 22 23 - >; + #iommu-cells = <2>; + dma-coherent; }; - xgmac1_smmu: smmu@e0800000 { + xgmac1_smmu: iommu@e0800000 { compatible = "arm,mmu-401"; reg = <0 0xe0800000 0 0x10000>; #global-interrupts = <1>; @@ -110,9 +107,6 @@ */ <0 335 4>, <0 335 4>; - - mmu-masters = <&xgmac1 - 0 1 2 3 4 5 6 7 - 16 17 18 19 20 21 22 23 - >; + #iommu-cells = <2>; + dma-coherent; }; diff --git a/arch/arm64/boot/dts/amd/husky.dts b/arch/arm64/boot/dts/amd/husky.dts deleted file mode 100644 index 7acde34772cb..000000000000 --- a/arch/arm64/boot/dts/amd/husky.dts +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DTS file for AMD/Linaro 96Boards Enterprise Edition Server (Husky) Board - * Note: Based-on AMD Seattle Rev.B0 - * - * Copyright (C) 2015 Advanced Micro Devices, Inc. - */ - -/dts-v1/; - -/include/ "amd-seattle-soc.dtsi" - -/ { - model = "Linaro 96Boards Enterprise Edition Server (Husky) Board"; - compatible = "amd,seattle-overdrive", "amd,seattle"; - - chosen { - stdout-path = &serial0; - }; - - psci { - compatible = "arm,psci-0.2"; - method = "smc"; - }; -}; - -&ccp0 { - status = "ok"; - amd,zlib-support = <1>; -}; - -/** - * NOTE: In Rev.B, gpio0 is reserved. - */ -&gpio1 { - status = "ok"; -}; - -&gpio2 { - status = "ok"; -}; - -&gpio3 { - status = "ok"; -}; - -&gpio4 { - status = "ok"; -}; - -&i2c0 { - status = "ok"; -}; - -&i2c1 { - status = "ok"; -}; - -&pcie0 { - status = "ok"; -}; - -&spi0 { - status = "ok"; -}; - -&spi1 { - status = "ok"; - sdcard0: sdcard@0 { - compatible = "mmc-spi-slot"; - reg = <0>; - spi-max-frequency = <20000000>; - voltage-ranges = <3200 3400>; - pl022,hierarchy = <0>; - pl022,interface = <0>; - pl022,com-mode = <0x0>; - pl022,rx-level-trig = <0>; - pl022,tx-level-trig = <0>; - }; -}; - -&smb0 { - /include/ "amd-seattle-xgbe-b.dtsi" -}; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 01b01e320411..35d1939e690b 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -536,9 +536,9 @@ clock-names = "i2c"; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(1)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 687fea6d8afa..4e7bd04d9798 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -499,9 +499,9 @@ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL QORIQ_CLK_PLL_DIV(2)>; - dmas = <&edma0 1 39>, - <&edma0 1 38>; - dma-names = "tx", "rx"; + dmas = <&edma0 1 38>, + <&edma0 1 39>; + dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/h8300/include/asm/user.h b/arch/h8300/include/asm/user.h index 2298909f24c6..161653d84b34 100644 --- a/arch/h8300/include/asm/user.h +++ b/arch/h8300/include/asm/user.h @@ -67,9 +67,5 @@ struct user { unsigned long magic; /* To uniquely identify a core file */ char u_comm[32]; /* User command that was responsible */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif diff --git a/arch/ia64/include/asm/user.h b/arch/ia64/include/asm/user.h index 0ba486651b7c..ec03d3ab8715 100644 --- a/arch/ia64/include/asm/user.h +++ b/arch/ia64/include/asm/user.h @@ -50,10 +50,4 @@ struct user { char u_comm[32]; /* user command name */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_DATA_START_ADDR (u.start_data) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) - #endif /* _ASM_IA64_USER_H */ diff --git a/arch/ia64/kernel/syscalls/Makefile b/arch/ia64/kernel/syscalls/Makefile index 14f40ecf8b65..d009f927a048 100644 --- a/arch/ia64/kernel/syscalls/Makefile +++ b/arch/ia64/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/m68k/include/asm/user.h b/arch/m68k/include/asm/user.h index 509d555977c8..61413bff613a 100644 --- a/arch/m68k/include/asm/user.h +++ b/arch/m68k/include/asm/user.h @@ -79,9 +79,5 @@ struct user{ unsigned long magic; /* To uniquely identify a core file */ char u_comm[32]; /* User command that was responsible */ }; -#define NBPG 4096 -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif diff --git a/arch/m68k/kernel/syscalls/Makefile b/arch/m68k/kernel/syscalls/Makefile index 6713c65a25e1..b265e4bc16c2 100644 --- a/arch/m68k/kernel/syscalls/Makefile +++ b/arch/m68k/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index cff570a71946..2b42c370d574 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -29,7 +29,7 @@ $(obj)/simpleImage.$(DTB).ub: $(obj)/simpleImage.$(DTB) FORCE $(call if_changed,uimage) $(obj)/simpleImage.$(DTB).unstrip: vmlinux FORCE - $(call if_changed,shipped) + $(call if_changed,copy) $(obj)/simpleImage.$(DTB).strip: vmlinux FORCE $(call if_changed,strip) diff --git a/arch/microblaze/boot/dts/Makefile b/arch/microblaze/boot/dts/Makefile index ef00dd30d19a..b84e2cbb20ee 100644 --- a/arch/microblaze/boot/dts/Makefile +++ b/arch/microblaze/boot/dts/Makefile @@ -12,7 +12,7 @@ $(obj)/linked_dtb.o: $(obj)/system.dtb # Generate system.dtb from $(DTB).dtb ifneq ($(DTB),system) $(obj)/system.dtb: $(obj)/$(DTB).dtb - $(call if_changed,shipped) + $(call if_changed,copy) endif endif diff --git a/arch/microblaze/kernel/syscalls/Makefile b/arch/microblaze/kernel/syscalls/Makefile index 6713c65a25e1..b265e4bc16c2 100644 --- a/arch/microblaze/kernel/syscalls/Makefile +++ b/arch/microblaze/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c index 0a03529cf317..3e4f5ba104f8 100644 --- a/arch/mips/crypto/crc32-mips.c +++ b/arch/mips/crypto/crc32-mips.c @@ -28,7 +28,7 @@ enum crc_type { }; #ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_MACRO_CRC32(OP, SZ, TYPE) \ +#define _ASM_SET_CRC(OP, SZ, TYPE) \ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ".ifnc \\rt, \\rt2\n\t" \ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ @@ -37,30 +37,36 @@ _ASM_MACRO_3R(OP, rt, rs, rt2, \ ((SZ) << 6) | ((TYPE) << 8)) \ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ ((SZ) << 14) | ((TYPE) << 3))) -_ASM_MACRO_CRC32(crc32b, 0, 0); -_ASM_MACRO_CRC32(crc32h, 1, 0); -_ASM_MACRO_CRC32(crc32w, 2, 0); -_ASM_MACRO_CRC32(crc32d, 3, 0); -_ASM_MACRO_CRC32(crc32cb, 0, 1); -_ASM_MACRO_CRC32(crc32ch, 1, 1); -_ASM_MACRO_CRC32(crc32cw, 2, 1); -_ASM_MACRO_CRC32(crc32cd, 3, 1); -#define _ASM_SET_CRC "" +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" #else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC ".set\tcrc\n\t" +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) #endif -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC \ - #type #size " %0, %1, %0\n\t" \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ } while (0) +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + #define CRC32(crc, value, size) \ _CRC32(crc, value, size, crc32) diff --git a/arch/mips/include/asm/mach-rc32434/rb.h b/arch/mips/include/asm/mach-rc32434/rb.h index 34d179ca020b..dd9d4b026e62 100644 --- a/arch/mips/include/asm/mach-rc32434/rb.h +++ b/arch/mips/include/asm/mach-rc32434/rb.h @@ -29,15 +29,6 @@ #define DEV3TC 0x01003C #define BTCS 0x010040 #define BTCOMPARE 0x010044 -#define GPIOBASE 0x050000 -/* Offsets relative to GPIOBASE */ -#define GPIOFUNC 0x00 -#define GPIOCFG 0x04 -#define GPIOD 0x08 -#define GPIOILEVEL 0x0C -#define GPIOISTAT 0x10 -#define GPIONMIEN 0x14 -#define IMASK6 0x38 #define LO_WPX (1 << 0) #define LO_ALE (1 << 1) #define LO_CLE (1 << 2) diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index 10bf90dc02c0..e6b21de65cca 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syshdr := $(srctree)/scripts/syscallhdr.sh sysnr := $(srctree)/$(src)/syscallnr.sh diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 64726c670ca6..5204fc6d6d50 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1..200fe9ff641d 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 917fac1636b7..084f6caba5f2 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -315,6 +315,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -338,6 +340,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -356,24 +360,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -393,9 +401,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 94f02ada4082..29c21b9d42da 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -37,6 +37,16 @@ #include <asm/mach-rc32434/rb.h> #include <asm/mach-rc32434/gpio.h> +#define GPIOBASE 0x050000 +/* Offsets relative to GPIOBASE */ +#define GPIOFUNC 0x00 +#define GPIOCFG 0x04 +#define GPIOD 0x08 +#define GPIOILEVEL 0x0C +#define GPIOISTAT 0x10 +#define GPIONMIEN 0x14 +#define IMASK6 0x38 + struct rb532_gpio_chip { struct gpio_chip chip; void __iomem *regbase; diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index dfc52f661ad0..38d12f417e48 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -363,6 +363,8 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq) printk(KERN_INFO "GIO: slot %d : %s (id %x)\n", slotno, name, id); gio_dev = kzalloc(sizeof *gio_dev, GFP_KERNEL); + if (!gio_dev) + return; gio_dev->name = name; gio_dev->slotno = slotno; gio_dev->id.id = id; diff --git a/arch/parisc/kernel/syscalls/Makefile b/arch/parisc/kernel/syscalls/Makefile index d63f18dd058d..8440c16dfb22 100644 --- a/arch/parisc/kernel/syscalls/Makefile +++ b/arch/parisc/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h index 99443b8594e7..7fae7e597ba4 100644 --- a/arch/powerpc/include/asm/user.h +++ b/arch/powerpc/include/asm/user.h @@ -44,9 +44,4 @@ struct user { char u_comm[32]; /* user command name */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_DATA_START_ADDR (u.start_data) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _ASM_POWERPC_USER_H */ diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile index 5476f62eb80f..9d7bd81510b8 100644 --- a/arch/powerpc/kernel/syscalls/Makefile +++ b/arch/powerpc/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index ea8ec8a960bd..00fd9c548f26 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -16,6 +16,7 @@ config RISCV select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX @@ -47,6 +48,7 @@ config RISCV select CLONE_BACKWARDS select CLINT_TIMER if !MMU select COMMON_CLK + select CPU_PM if CPU_IDLE select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY if SMP select GENERIC_ATOMIC64 if !64BIT @@ -533,4 +535,10 @@ source "kernel/power/Kconfig" endmenu +menu "CPU Power Management" + +source "drivers/cpuidle/Kconfig" + +endmenu + source "arch/riscv/kvm/Kconfig" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index c112ab2a9052..34592d00dde8 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -36,6 +36,9 @@ config SOC_VIRT select GOLDFISH select RTC_DRV_GOLDFISH if RTC_CLASS select SIFIVE_PLIC + select PM_GENERIC_DOMAINS if PM + select PM_GENERIC_DOMAINS_OF if PM && OF + select RISCV_SBI_CPUIDLE if CPU_IDLE help This enables support for QEMU Virt Machine. diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts index 984872f3d3a9..b9e30df127fe 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts @@ -203,6 +203,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts index 7ba99b4da304..8d23401b0bbb 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts @@ -205,6 +205,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts index be9b12c9b374..24fd83b43d9d 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts @@ -213,6 +213,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts index 031c0c28f819..25341f38292a 100644 --- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts +++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts @@ -178,6 +178,8 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <50000000>; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; m25p,fast-read; broken-flash-reset; }; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 7cd10ded7bf8..30e3017f22bc 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -15,11 +15,14 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_PROFILING=y CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_PM=y +CONFIG_CPU_IDLE=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y @@ -64,8 +67,6 @@ CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_EARLYCON_RISCV_SBI=y -CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 3f42ed87dde8..2438fa39f8ae 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -21,7 +21,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index af64b95e88cc..9a133e63ae5b 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index e1c9864b6237..5269fbb6b4fc 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -19,7 +19,6 @@ CONFIG_EXPERT=y # CONFIG_AIO is not set # CONFIG_IO_URING is not set # CONFIG_ADVISE_SYSCALLS is not set -# CONFIG_MEMBARRIER is not set # CONFIG_KALLSYMS is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index e0e5c7c09ab8..7e5efdc3829d 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -15,11 +15,14 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_PROFILING=y CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y CONFIG_ARCH_RV32I=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y +CONFIG_PM=y +CONFIG_CPU_IDLE=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m CONFIG_JUMP_LABEL=y @@ -62,8 +65,6 @@ CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_EARLYCON_RISCV_SBI=y -CONFIG_HVC_RISCV_SBI=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 618d7c5af1a2..8c2549b16ac0 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -67,4 +67,30 @@ #error "Unexpected __SIZEOF_SHORT__" #endif +#ifdef __ASSEMBLY__ + +/* Common assembly source macros */ + +#ifdef CONFIG_XIP_KERNEL +.macro XIP_FIXUP_OFFSET reg + REG_L t0, _xip_fixup + add \reg, \reg, t0 +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg + la t1, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 +.endm +_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET +#else +.macro XIP_FIXUP_OFFSET reg +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg +.endm +#endif /* CONFIG_XIP_KERNEL */ + +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_RISCV_ASM_H */ diff --git a/arch/riscv/include/asm/cpuidle.h b/arch/riscv/include/asm/cpuidle.h new file mode 100644 index 000000000000..71fdc607d4bc --- /dev/null +++ b/arch/riscv/include/asm/cpuidle.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Allwinner Ltd + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#ifndef _ASM_RISCV_CPUIDLE_H +#define _ASM_RISCV_CPUIDLE_H + +#include <asm/barrier.h> +#include <asm/processor.h> + +static inline void cpu_do_idle(void) +{ + /* + * Add mb() here to ensure that all + * IO/MEM accesses are completed prior + * to entering WFI. + */ + mb(); + wait_for_interrupt(); +} + +#endif diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h index 1de233d8e8de..21774d868c65 100644 --- a/arch/riscv/include/asm/current.h +++ b/arch/riscv/include/asm/current.h @@ -33,6 +33,8 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() +register unsigned long current_stack_pointer __asm__("sp"); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_CURRENT_H */ diff --git a/arch/riscv/include/asm/module.lds.h b/arch/riscv/include/asm/module.lds.h index 4254ff2ff049..1075beae1ac6 100644 --- a/arch/riscv/include/asm/module.lds.h +++ b/arch/riscv/include/asm/module.lds.h @@ -2,8 +2,8 @@ /* Copyright (C) 2017 Andes Technology Corporation */ #ifdef CONFIG_MODULE_SECTIONS SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } #endif diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h new file mode 100644 index 000000000000..8be391c2aecb --- /dev/null +++ b/arch/riscv/include/asm/suspend.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#ifndef _ASM_RISCV_SUSPEND_H +#define _ASM_RISCV_SUSPEND_H + +#include <asm/ptrace.h> + +struct suspend_context { + /* Saved and restored by low-level functions */ + struct pt_regs regs; + /* Saved and restored by high-level functions */ + unsigned long scratch; + unsigned long tvec; + unsigned long ie; +#ifdef CONFIG_MMU + unsigned long satp; +#endif +}; + +/* Low-level CPU suspend entry function */ +int __cpu_suspend_enter(struct suspend_context *context); + +/* High-level CPU suspend which will save context and call finish() */ +int cpu_suspend(unsigned long arg, + int (*finish)(unsigned long arg, + unsigned long entry, + unsigned long context)); + +/* Low-level CPU resume entry function */ +int __cpu_resume_enter(unsigned long hartid, unsigned long context); + +#endif diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 60da0dcacf14..74d888c8d631 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -11,11 +11,17 @@ #include <asm/page.h> #include <linux/const.h> +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + /* thread information allocation */ #ifdef CONFIG_64BIT -#define THREAD_SIZE_ORDER (2) +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #else -#define THREAD_SIZE_ORDER (1) +#define THREAD_SIZE_ORDER (1 + KASAN_STACK_ORDER) #endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index e0133d113216..87adbe47bc15 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -48,6 +48,8 @@ obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o +obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o + obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index df0519a64eaf..df9444397908 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -13,6 +13,7 @@ #include <asm/thread_info.h> #include <asm/ptrace.h> #include <asm/cpu_ops_sbi.h> +#include <asm/suspend.h> void asm_offsets(void); @@ -113,6 +114,8 @@ void asm_offsets(void) OFFSET(PT_BADADDR, pt_regs, badaddr); OFFSET(PT_CAUSE, pt_regs, cause); + OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs); + OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero); OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra); OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index d2a936195295..ccb617791e56 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -69,11 +69,11 @@ int riscv_of_parent_hartid(struct device_node *node) .uprop = #UPROP, \ .isa_ext_id = EXTID, \ } -/** +/* * Here are the ordering rules of extension naming defined by RISC-V * specification : * 1. All extensions should be separated from other multi-letter extensions - * from other multi-letter extensions by an underscore. + * by an underscore. * 2. The first letter following the 'Z' conventionally indicates the most * closely related alphabetical extension category, IMAFDQLCBKJTPVH. * If multiple 'Z' extensions are named, they should be ordered first @@ -110,7 +110,7 @@ static void print_isa_ext(struct seq_file *f) } } -/** +/* * These are the only valid base (single letter) ISA extensions as per the spec. * It also specifies the canonical order in which it appears in the spec. * Some of the extension may just be a place holder for now (B, K, P, J). diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 2e16f6732cdf..4f5a6f84e2a4 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -21,7 +21,7 @@ const struct cpu_operations cpu_ops_sbi; * be invoked from multiple threads in parallel. Define a per cpu data * to handle that. */ -DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); +static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index ec07f991866a..893b8bb69391 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -16,26 +16,6 @@ #include <asm/image.h> #include "efi-header.S" -#ifdef CONFIG_XIP_KERNEL -.macro XIP_FIXUP_OFFSET reg - REG_L t0, _xip_fixup - add \reg, \reg, t0 -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg - la t0, __data_loc - REG_L t1, _xip_phys_offset - sub \reg, \reg, t1 - add \reg, \reg, t0 -.endm -_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET -_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET -#else -.macro XIP_FIXUP_OFFSET reg -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg -.endm -#endif /* CONFIG_XIP_KERNEL */ - __HEAD ENTRY(_start) /* @@ -89,7 +69,8 @@ pe_head_start: .align 2 #ifdef CONFIG_MMU -relocate: + .global relocate_enable_mmu +relocate_enable_mmu: /* Relocate return address */ la a1, kernel_map XIP_FIXUP_OFFSET a1 @@ -184,7 +165,7 @@ secondary_start_sbi: /* Enable virtual memory and relocate to virtual address */ la a0, swapper_pg_dir XIP_FIXUP_OFFSET a0 - call relocate + call relocate_enable_mmu #endif call setup_trap_vector tail smp_callin @@ -328,7 +309,7 @@ clear_bss_done: #ifdef CONFIG_MMU la a0, early_pg_dir XIP_FIXUP_OFFSET a0 - call relocate + call relocate_enable_mmu #endif /* CONFIG_MMU */ call setup_trap_vector diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 4a48287513c3..c29cef90d1dd 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -69,7 +69,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location, return 0; } -static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, +static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; @@ -301,7 +301,7 @@ static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, [R_RISCV_64] = apply_r_riscv_64_rela, [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, [R_RISCV_JAL] = apply_r_riscv_jal_rela, - [R_RISCV_RVC_BRANCH] = apply_r_riscv_rcv_branch_rela, + [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 55faa4991b87..3348a61de7d9 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -68,7 +68,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, static bool fill_callchain(void *entry, unsigned long pc) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 03ac3aa611f5..504b496787aa 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -23,6 +23,7 @@ #include <asm/string.h> #include <asm/switch_to.h> #include <asm/thread_info.h> +#include <asm/cpuidle.h> register unsigned long gp_in_global __asm__("gp"); @@ -37,7 +38,7 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { - wait_for_interrupt(); + cpu_do_idle(); raw_local_irq_enable(); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 14d2b53ec322..08d11a53f39e 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -14,8 +14,6 @@ #include <asm/stacktrace.h> -register unsigned long sp_in_global __asm__("sp"); - #ifdef CONFIG_FRAME_POINTER void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, @@ -30,7 +28,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, pc = instruction_pointer(regs); } else if (task == NULL || task == current) { fp = (unsigned long)__builtin_frame_address(0); - sp = sp_in_global; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ @@ -78,7 +76,7 @@ void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - sp = sp_in_global; + sp = current_stack_pointer; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c new file mode 100644 index 000000000000..9ba24fb8cc93 --- /dev/null +++ b/arch/riscv/kernel/suspend.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/ftrace.h> +#include <asm/csr.h> +#include <asm/suspend.h> + +static void suspend_save_csrs(struct suspend_context *context) +{ + context->scratch = csr_read(CSR_SCRATCH); + context->tvec = csr_read(CSR_TVEC); + context->ie = csr_read(CSR_IE); + + /* + * No need to save/restore IP CSR (i.e. MIP or SIP) because: + * + * 1. For no-MMU (M-mode) kernel, the bits in MIP are set by + * external devices (such as interrupt controller, timer, etc). + * 2. For MMU (S-mode) kernel, the bits in SIP are set by + * M-mode firmware and external devices (such as interrupt + * controller, etc). + */ + +#ifdef CONFIG_MMU + context->satp = csr_read(CSR_SATP); +#endif +} + +static void suspend_restore_csrs(struct suspend_context *context) +{ + csr_write(CSR_SCRATCH, context->scratch); + csr_write(CSR_TVEC, context->tvec); + csr_write(CSR_IE, context->ie); + +#ifdef CONFIG_MMU + csr_write(CSR_SATP, context->satp); +#endif +} + +int cpu_suspend(unsigned long arg, + int (*finish)(unsigned long arg, + unsigned long entry, + unsigned long context)) +{ + int rc = 0; + struct suspend_context context = { 0 }; + + /* Finisher should be non-NULL */ + if (!finish) + return -EINVAL; + + /* Save additional CSRs*/ + suspend_save_csrs(&context); + + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka finishers) hence disable + * graph tracing during their execution. + */ + pause_graph_tracing(); + + /* Save context on stack */ + if (__cpu_suspend_enter(&context)) { + /* Call the finisher */ + rc = finish(arg, __pa_symbol(__cpu_resume_enter), + (ulong)&context); + + /* + * Should never reach here, unless the suspend finisher + * fails. Successful cpu_suspend() should return from + * __cpu_resume_entry() + */ + if (!rc) + rc = -EOPNOTSUPP; + } + + /* Enable function graph tracer */ + unpause_graph_tracing(); + + /* Restore additional CSRs */ + suspend_restore_csrs(&context); + + return rc; +} diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S new file mode 100644 index 000000000000..4b07b809a2b8 --- /dev/null +++ b/arch/riscv/kernel/suspend_entry.S @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/csr.h> + + .text + .altmacro + .option norelax + +ENTRY(__cpu_suspend_enter) + /* Save registers (except A0 and T0-T6) */ + REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_S gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_S tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_S s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_S s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_S a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_S a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_S a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_S a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_S a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_S a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_S a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_S s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_S s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_S s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_S s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_S s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_S s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_S s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_S s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_S s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_S s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Save CSRs */ + csrr t0, CSR_EPC + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrr t0, CSR_STATUS + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrr t0, CSR_TVAL + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrr t0, CSR_CAUSE + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + + /* Return non-zero value */ + li a0, 1 + + /* Return to C code */ + ret +END(__cpu_suspend_enter) + +ENTRY(__cpu_resume_enter) + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + +#ifdef CONFIG_MMU + /* Save A0 and A1 */ + add t0, a0, zero + add t1, a1, zero + + /* Enable MMU */ + la a0, swapper_pg_dir + XIP_FIXUP_OFFSET a0 + call relocate_enable_mmu + + /* Restore A0 and A1 */ + add a0, t0, zero + add a1, t1, zero +#endif + + /* Make A0 point to suspend context */ + add a0, a1, zero + + /* Restore CSRs */ + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrw CSR_EPC, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrw CSR_STATUS, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrw CSR_TVAL, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + csrw CSR_CAUSE, t0 + + /* Restore registers (except A0 and T0-T6) */ + REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Return zero value */ + add a0, zero, zero + + /* Return to C code */ + ret +END(__cpu_resume_enter) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9b80e8bed3f6..77b5a03de13a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,7 @@ config S390 select ALTERNATE_USER_ADDRESS_SPACE select ARCH_32BIT_USTAT_F_TINODE select ARCH_BINFMT_ELF_STATE + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h index 955d620db23e..bb3837d7387c 100644 --- a/arch/s390/include/asm/alternative-asm.h +++ b/arch/s390/include/asm/alternative-asm.h @@ -37,9 +37,15 @@ * a 2-byte nop if the size of the area is not divisible by 6. */ .macro alt_pad_fill bytes - .fill ( \bytes ) / 6, 6, 0xc0040000 - .fill ( \bytes ) % 6 / 4, 4, 0x47000000 - .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700 + .rept ( \bytes ) / 6 + brcl 0,0 + .endr + .rept ( \bytes ) % 6 / 4 + nop + .endr + .rept ( \bytes ) % 6 % 4 / 2 + nopr + .endr .endm /* diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index d3880ca764ee..3f2856ed6808 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -71,11 +71,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); ".if " oldinstr_pad_len(num) " > 6\n" \ "\tjg " e_oldinstr_pad_end "f\n" \ "6620:\n" \ - "\t.fill (" oldinstr_pad_len(num) " - (6620b-662b)) / 2, 2, 0x0700\n" \ + "\t.rept (" oldinstr_pad_len(num) " - (6620b-662b)) / 2\n" \ + "\tnopr\n" \ ".else\n" \ - "\t.fill " oldinstr_pad_len(num) " / 6, 6, 0xc0040000\n" \ - "\t.fill " oldinstr_pad_len(num) " %% 6 / 4, 4, 0x47000000\n" \ - "\t.fill " oldinstr_pad_len(num) " %% 6 %% 4 / 2, 2, 0x0700\n" \ + "\t.rept " oldinstr_pad_len(num) " / 6\n" \ + "\t.brcl 0,0\n" \ + "\t.endr\n" \ + "\t.rept " oldinstr_pad_len(num) " %% 6 / 4\n" \ + "\tnop\n" \ + "\t.endr\n" \ + "\t.rept " oldinstr_pad_len(num) " %% 6 %% 4 / 2\n" \ + "\tnopr\n" \ + ".endr\n" \ ".endif\n" #define OLDINSTR(oldinstr, num) \ diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index ae75da592ccb..b515cfa62bd9 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -60,11 +60,11 @@ static inline bool ap_instructions_available(void) unsigned long reg1 = 0; asm volatile( - " lgr 0,%[reg0]\n" /* qid into gr0 */ - " lghi 1,0\n" /* 0 into gr1 */ - " lghi 2,0\n" /* 0 into gr2 */ - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - "0: la %[reg1],1\n" /* 1 into reg1 */ + " lgr 0,%[reg0]\n" /* qid into gr0 */ + " lghi 1,0\n" /* 0 into gr1 */ + " lghi 2,0\n" /* 0 into gr2 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */ + "0: la %[reg1],1\n" /* 1 into reg1 */ "1:\n" EX_TABLE(0b, 1b) : [reg1] "+&d" (reg1) @@ -86,11 +86,11 @@ static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) unsigned long reg2; asm volatile( - " lgr 0,%[qid]\n" /* qid into gr0 */ - " lghi 2,0\n" /* 0 into gr2 */ - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - " lgr %[reg2],2\n" /* gr2 into reg2 */ + " lgr 0,%[qid]\n" /* qid into gr0 */ + " lghi 2,0\n" /* 0 into gr2 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(TAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg2],2\n" /* gr2 into reg2 */ : [reg1] "=&d" (reg1), [reg2] "=&d" (reg2) : [qid] "d" (qid) : "cc", "0", "1", "2"); @@ -128,9 +128,9 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid) struct ap_queue_status reg1; asm volatile( - " lgr 0,%[reg0]\n" /* qid arg into gr0 */ - " .long 0xb2af0000\n" /* PQAP(RAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(RAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1) : [reg0] "d" (reg0) : "cc", "0", "1"); @@ -149,9 +149,9 @@ static inline struct ap_queue_status ap_zapq(ap_qid_t qid) struct ap_queue_status reg1; asm volatile( - " lgr 0,%[reg0]\n" /* qid arg into gr0 */ - " .long 0xb2af0000\n" /* PQAP(ZAPQ) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr 0,%[reg0]\n" /* qid arg into gr0 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(ZAPQ) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ : [reg1] "=&d" (reg1) : [reg0] "d" (reg0) : "cc", "0", "1"); @@ -190,10 +190,10 @@ static inline int ap_qci(struct ap_config_info *config) struct ap_config_info *reg2 = config; asm volatile( - " lgr 0,%[reg0]\n" /* QCI fc into gr0 */ - " lgr 2,%[reg2]\n" /* ptr to config into gr2 */ - " .long 0xb2af0000\n" /* PQAP(QCI) */ - "0: la %[reg1],0\n" /* good case, QCI fc available */ + " lgr 0,%[reg0]\n" /* QCI fc into gr0 */ + " lgr 2,%[reg2]\n" /* ptr to config into gr2 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(QCI) */ + "0: la %[reg1],0\n" /* good case, QCI fc available */ "1:\n" EX_TABLE(0b, 1b) : [reg1] "+&d" (reg1) @@ -246,11 +246,11 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, reg1.qirqctrl = qirqctrl; asm volatile( - " lgr 0,%[reg0]\n" /* qid param into gr0 */ - " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */ - " lgr 2,%[reg2]\n" /* ni addr into gr2 */ - " .long 0xb2af0000\n" /* PQAP(AQIC) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr 0,%[reg0]\n" /* qid param into gr0 */ + " lgr 1,%[reg1]\n" /* irq ctrl into gr1 */ + " lgr 2,%[reg2]\n" /* ni addr into gr2 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(AQIC) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ : [reg1] "+&d" (reg1) : [reg0] "d" (reg0), [reg2] "d" (reg2) : "cc", "0", "1", "2"); @@ -297,11 +297,11 @@ static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, reg1.value = apinfo->val; asm volatile( - " lgr 0,%[reg0]\n" /* qid param into gr0 */ - " lgr 1,%[reg1]\n" /* qact in info into gr1 */ - " .long 0xb2af0000\n" /* PQAP(QACT) */ - " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ - " lgr %[reg2],2\n" /* qact out info into reg2 */ + " lgr 0,%[reg0]\n" /* qid param into gr0 */ + " lgr 1,%[reg1]\n" /* qact in info into gr1 */ + " .insn rre,0xb2af0000,0,0\n" /* PQAP(QACT) */ + " lgr %[reg1],1\n" /* gr1 (status) into reg1 */ + " lgr %[reg2],2\n" /* qact out info into reg2 */ : [reg1] "+&d" (reg1), [reg2] "=&d" (reg2) : [reg0] "d" (reg0) : "cc", "0", "1", "2"); diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index c800199a376b..82388da3f95f 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -74,8 +74,17 @@ static __always_inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) __ctl_load(reg, cr, cr); } -void smp_ctl_set_bit(int cr, int bit); -void smp_ctl_clear_bit(int cr, int bit); +void smp_ctl_set_clear_bit(int cr, int bit, bool set); + +static inline void ctl_set_bit(int cr, int bit) +{ + smp_ctl_set_clear_bit(cr, bit, true); +} + +static inline void ctl_clear_bit(int cr, int bit) +{ + smp_ctl_set_clear_bit(cr, bit, false); +} union ctlreg0 { unsigned long val; @@ -130,8 +139,5 @@ union ctlreg15 { }; }; -#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) -#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) - #endif /* __ASSEMBLY__ */ #endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 84ec63145325..eee8d96fb38e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -319,11 +319,18 @@ extern void (*s390_base_pgm_handler_fn)(struct pt_regs *regs); extern int memcpy_real(void *, unsigned long, size_t); extern void memcpy_absolute(void *, void *, size_t); -#define mem_assign_absolute(dest, val) do { \ - __typeof__(dest) __tmp = (val); \ - \ - BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \ - memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ +#define put_abs_lowcore(member, x) do { \ + unsigned long __abs_address = offsetof(struct lowcore, member); \ + __typeof__(((struct lowcore *)0)->member) __tmp = (x); \ + \ + memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \ +} while (0) + +#define get_abs_lowcore(x, member) do { \ + unsigned long __abs_address = offsetof(struct lowcore, member); \ + __typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \ + \ + memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \ } while (0) extern int s390_isolate_bp(void); diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 888a2f1c9ee3..24a54443c865 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -78,7 +78,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) { typecheck(int, lp->lock); asm_inline volatile( - ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */ + ALTERNATIVE("", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ " sth %1,%0\n" : "=R" (((unsigned short *) &lp->lock)[1]) : "d" (0) : "cc", "memory"); diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h index ad2c996e7e93..fde7e6b1df48 100644 --- a/arch/s390/include/asm/syscall_wrapper.h +++ b/arch/s390/include/asm/syscall_wrapper.h @@ -162,4 +162,4 @@ __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ +#endif /* _ASM_S390_SYSCALL_WRAPPER_H */ diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 5ebf534ef753..0bf06f1682d8 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -4,6 +4,8 @@ #include <linux/sched.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> +#include <linux/llist.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -36,10 +38,21 @@ struct unwind_state { struct pt_regs *regs; unsigned long sp, ip; int graph_idx; + struct llist_node *kr_cur; bool reliable; bool error; }; +/* Recover the return address modified by kretprobe and ftrace_graph. */ +static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long ip) +{ + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); + if (is_kretprobe_trampoline(ip)) + ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur); + return ip; +} + void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long first_frame); bool unwind_next_frame(struct unwind_state *state); diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h index 0ca572ced21b..8e8aaf48582e 100644 --- a/arch/s390/include/asm/user.h +++ b/arch/s390/include/asm/user.h @@ -67,9 +67,5 @@ struct user { unsigned long magic; /* To uniquely identify a core file */ char u_comm[32]; /* User command that was responsible */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _S390_USER_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a601a518b569..59b69c8ab5e1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -121,22 +121,22 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "", ".long 0xb2e8c000", 82 + ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,12,0", 82 .endm .macro BPON - ALTERNATIVE "", ".long 0xb2e8d000", 82 + ALTERNATIVE "", ".insn rrf,0xb2e80000,0,0,13,0", 82 .endm .macro BPENTER tif_ptr,tif_mask - ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \ + ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ "", 82 .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask - ALTERNATIVE "jz .+8; .long 0xb2e8c000", \ - "jnz .+8; .long 0xb2e8d000", 82 + ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 .endm /* diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 28ae7df26c4a..1cc85b8ff42e 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1646,8 +1646,8 @@ static void dump_reipl_run(struct shutdown_trigger *trigger) csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - mem_assign_absolute(S390_lowcore.ipib, ipib); - mem_assign_absolute(S390_lowcore.ipib_checksum, csum); + put_abs_lowcore(ipib, ipib); + put_abs_lowcore(ipib_checksum, csum); dump_run(trigger); } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index e32c14fd1282..0032bdbe8e3f 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -284,11 +284,11 @@ NOKPROBE_SYMBOL(pop_kprobe); void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { - ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14]; - ri->fp = NULL; + ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14]; + ri->fp = (void *)regs->gprs[15]; /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long) &__kretprobe_trampoline; + regs->gprs[14] = (unsigned long)&__kretprobe_trampoline; } NOKPROBE_SYMBOL(arch_prepare_kretprobe); @@ -385,7 +385,7 @@ NOKPROBE_SYMBOL(arch_kretprobe_fixup_return); */ void trampoline_probe_handler(struct pt_regs *regs) { - kretprobe_trampoline_handler(regs, NULL); + kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]); } NOKPROBE_SYMBOL(trampoline_probe_handler); diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 088d57a3083f..b2ef014a9287 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -226,7 +226,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); + put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 6b5b64e67eee..1acc2e05d70f 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -63,7 +63,7 @@ void __init os_info_init(void) os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - mem_assign_absolute(S390_lowcore.os_info, __pa(ptr)); + put_abs_lowcore(os_info, __pa(ptr)); } #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 84e23fcc1106..d860ac300919 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -481,11 +481,11 @@ static void __init setup_lowcore_dat_off(void) lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; /* Setup absolute zero lowcore */ - mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack); - mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn); - mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data); - mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); - mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); + put_abs_lowcore(restart_stack, lc->restart_stack); + put_abs_lowcore(restart_fn, lc->restart_fn); + put_abs_lowcore(restart_data, lc->restart_data); + put_abs_lowcore(restart_source, lc->restart_source); + put_abs_lowcore(restart_psw, lc->restart_psw); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; @@ -501,6 +501,7 @@ static void __init setup_lowcore_dat_off(void) static void __init setup_lowcore_dat_on(void) { struct lowcore *lc = lowcore_ptr[0]; + int cr; __ctl_clear_bit(0, 28); S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; @@ -509,10 +510,10 @@ static void __init setup_lowcore_dat_on(void) S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; __ctl_store(S390_lowcore.cregs_save_area, 0, 15); __ctl_set_bit(0, 28); - mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS); - mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw); - memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area, - sizeof(S390_lowcore.cregs_save_area)); + put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS); + put_abs_lowcore(program_new_psw, lc->program_new_psw); + for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++) + put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]); } static struct resource code_resource = { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 127da1850b06..30c91d565933 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -213,7 +213,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) if (nmi_alloc_mcesa(&lc->mcesad)) goto out; lowcore_ptr[cpu] = lc; - pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); return 0; out: @@ -326,10 +326,17 @@ static void pcpu_delegate(struct pcpu *pcpu, /* Stop target cpu (if func returns this stops the current cpu). */ pcpu_sigp_retry(pcpu, SIGP_STOP, 0); /* Restart func on the target cpu and stop the current cpu. */ - mem_assign_absolute(lc->restart_stack, stack); - mem_assign_absolute(lc->restart_fn, (unsigned long) func); - mem_assign_absolute(lc->restart_data, (unsigned long) data); - mem_assign_absolute(lc->restart_source, source_cpu); + if (lc) { + lc->restart_stack = stack; + lc->restart_fn = (unsigned long)func; + lc->restart_data = (unsigned long)data; + lc->restart_source = source_cpu; + } else { + put_abs_lowcore(restart_stack, stack); + put_abs_lowcore(restart_fn, (unsigned long)func); + put_abs_lowcore(restart_data, (unsigned long)data); + put_abs_lowcore(restart_source, source_cpu); + } __bpon(); asm volatile( "0: sigp 0,%0,%2 # sigp restart to target cpu\n" @@ -570,39 +577,27 @@ static void smp_ctl_bit_callback(void *info) } static DEFINE_SPINLOCK(ctl_lock); -static unsigned long ctlreg; -/* - * Set a bit in a control register of all cpus - */ -void smp_ctl_set_bit(int cr, int bit) +void smp_ctl_set_clear_bit(int cr, int bit, bool set) { - struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; - - spin_lock(&ctl_lock); - memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); - __set_bit(bit, &ctlreg); - memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); - spin_unlock(&ctl_lock); - on_each_cpu(smp_ctl_bit_callback, &parms, 1); -} -EXPORT_SYMBOL(smp_ctl_set_bit); - -/* - * Clear a bit in a control register of all cpus - */ -void smp_ctl_clear_bit(int cr, int bit) -{ - struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; + struct ec_creg_mask_parms parms = { .cr = cr, }; + u64 ctlreg; + if (set) { + parms.orval = 1UL << bit; + parms.andval = -1UL; + } else { + parms.orval = 0; + parms.andval = ~(1UL << bit); + } spin_lock(&ctl_lock); - memcpy_absolute(&ctlreg, &S390_lowcore.cregs_save_area[cr], sizeof(ctlreg)); - __clear_bit(bit, &ctlreg); - memcpy_absolute(&S390_lowcore.cregs_save_area[cr], &ctlreg, sizeof(ctlreg)); + get_abs_lowcore(ctlreg, cregs_save_area[cr]); + ctlreg = (ctlreg & parms.andval) | parms.orval; + put_abs_lowcore(cregs_save_area[cr], ctlreg); spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } -EXPORT_SYMBOL(smp_ctl_clear_bit); +EXPORT_SYMBOL(smp_ctl_set_clear_bit); #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile index b98f25029b8e..fb85e797946d 100644 --- a/arch/s390/kernel/syscalls/Makefile +++ b/arch/s390/kernel/syscalls/Makefile @@ -21,8 +21,7 @@ uapi: $(uapi-hdrs-y) # Create output directory if not already present -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $< diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 674c65019434..1d2aa448d103 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -141,10 +141,10 @@ static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) do_trap(regs, SIGFPE, si_code, "floating point exception"); } -static void translation_exception(struct pt_regs *regs) +static void translation_specification_exception(struct pt_regs *regs) { /* May never happen. */ - panic("Translation exception"); + panic("Translation-Specification Exception"); } static void illegal_op(struct pt_regs *regs) @@ -368,7 +368,7 @@ static void (*pgm_check_table[128])(struct pt_regs *regs) = { [0x0f] = hfp_divide_exception, [0x10] = do_dat_exception, [0x11] = do_dat_exception, - [0x12] = translation_exception, + [0x12] = translation_specification_exception, [0x13] = special_op_exception, [0x14] = default_trap_handler, [0x15] = operand_exception, diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 707fd99f6734..0ece156fdd7c 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -64,8 +64,8 @@ bool unwind_next_frame(struct unwind_state *state) ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; - if (!__kernel_text_address(ip)) { - /* skip bogus %r14 */ + /* skip bogus %r14 or if is the same as regs->psw.addr */ + if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) { state->regs = NULL; return unwind_next_frame(state); } @@ -103,13 +103,11 @@ bool unwind_next_frame(struct unwind_state *state) if (sp & 0x7) goto out_err; - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp); - /* Update unwind state */ state->sp = sp; - state->ip = ip; state->regs = regs; state->reliable = reliable; + state->ip = unwind_recover_ret_addr(state, ip); return true; out_err: @@ -161,12 +159,10 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, ip = READ_ONCE_NOCHECK(sf->gprs[8]); } - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); - /* Update unwind state */ state->sp = sp; - state->ip = ip; state->reliable = true; + state->ip = unwind_recover_ret_addr(state, ip); if (!first_frame) return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ab73d99483fe..156d1c25a3c1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3462,7 +3462,7 @@ void exit_sie(struct kvm_vcpu *vcpu) /* Kick a guest cpu out of SIE to process a request synchronously */ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu) { - kvm_make_request(req, vcpu); + __kvm_make_request(req, vcpu); kvm_s390_vcpu_request(vcpu); } diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 692dc84cd19c..5e7ea8b111e8 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ + ALTERNATIVE("", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm_inline volatile( - ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */ + ALTERNATIVE("", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index c01f02887de4..9bb067321ab4 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -47,7 +47,7 @@ static void print_backtrace(char *bt) static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, unsigned long sp) { - int frame_count, prev_is_func2, seen_func2_func1; + int frame_count, prev_is_func2, seen_func2_func1, seen_kretprobe_trampoline; const int max_frames = 128; struct unwind_state state; size_t bt_pos = 0; @@ -63,6 +63,7 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, frame_count = 0; prev_is_func2 = 0; seen_func2_func1 = 0; + seen_kretprobe_trampoline = 0; unwind_for_each_frame(&state, task, regs, sp) { unsigned long addr = unwind_get_return_address(&state); char sym[KSYM_SYMBOL_LEN]; @@ -88,6 +89,8 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1")) seen_func2_func1 = 1; prev_is_func2 = str_has_prefix(sym, "unwindme_func2"); + if (str_has_prefix(sym, "__kretprobe_trampoline+0x0/")) + seen_kretprobe_trampoline = 1; } /* Check the results. */ @@ -103,6 +106,10 @@ static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs, kunit_err(current_test, "Maximum number of frames exceeded\n"); ret = -EINVAL; } + if (seen_kretprobe_trampoline) { + kunit_err(current_test, "__kretprobe_trampoline+0x0 in unwinding results\n"); + ret = -EINVAL; + } if (ret || force_bt) print_backtrace(bt); kfree(bt); @@ -132,36 +139,50 @@ static struct unwindme *unwindme; #define UWM_PGM 0x40 /* Unwind from program check handler */ #define UWM_KPROBE_ON_FTRACE 0x80 /* Unwind from kprobe handler called via ftrace. */ #define UWM_FTRACE 0x100 /* Unwind from ftrace handler. */ -#define UWM_KRETPROBE 0x200 /* Unwind kretprobe handlers. */ +#define UWM_KRETPROBE 0x200 /* Unwind through kretprobed function. */ +#define UWM_KRETPROBE_HANDLER 0x400 /* Unwind from kretprobe handler. */ -static __always_inline unsigned long get_psw_addr(void) +static __always_inline struct pt_regs fake_pt_regs(void) { - unsigned long psw_addr; + struct pt_regs regs; + + memset(®s, 0, sizeof(regs)); + regs.gprs[15] = current_stack_pointer(); asm volatile( "basr %[psw_addr],0\n" - : [psw_addr] "=d" (psw_addr)); - return psw_addr; + : [psw_addr] "=d" (regs.psw.addr)); + return regs; } static int kretprobe_ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { struct unwindme *u = unwindme; + if (!(u->flags & UWM_KRETPROBE_HANDLER)) + return 0; + u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL, (u->flags & UWM_SP) ? u->sp : 0); return 0; } -static noinline notrace void test_unwind_kretprobed_func(void) +static noinline notrace int test_unwind_kretprobed_func(struct unwindme *u) { - asm volatile(" nop\n"); + struct pt_regs regs; + + if (!(u->flags & UWM_KRETPROBE)) + return 0; + + regs = fake_pt_regs(); + return test_unwind(NULL, (u->flags & UWM_REGS) ? ®s : NULL, + (u->flags & UWM_SP) ? u->sp : 0); } -static noinline void test_unwind_kretprobed_func_caller(void) +static noinline int test_unwind_kretprobed_func_caller(struct unwindme *u) { - test_unwind_kretprobed_func(); + return test_unwind_kretprobed_func(u); } static int test_unwind_kretprobe(struct unwindme *u) @@ -187,10 +208,12 @@ static int test_unwind_kretprobe(struct unwindme *u) return -EINVAL; } - test_unwind_kretprobed_func_caller(); + ret = test_unwind_kretprobed_func_caller(u); unregister_kretprobe(&my_kretprobe); unwindme = NULL; - return u->ret; + if (u->flags & UWM_KRETPROBE_HANDLER) + ret = u->ret; + return ret; } static int kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs) @@ -304,16 +327,13 @@ static noinline int unwindme_func4(struct unwindme *u) return 0; } else if (u->flags & (UWM_PGM | UWM_KPROBE_ON_FTRACE)) { return test_unwind_kprobe(u); - } else if (u->flags & (UWM_KRETPROBE)) { + } else if (u->flags & (UWM_KRETPROBE | UWM_KRETPROBE_HANDLER)) { return test_unwind_kretprobe(u); } else if (u->flags & UWM_FTRACE) { return test_unwind_ftrace(u); } else { - struct pt_regs regs; + struct pt_regs regs = fake_pt_regs(); - memset(®s, 0, sizeof(regs)); - regs.psw.addr = get_psw_addr(); - regs.gprs[15] = current_stack_pointer(); return test_unwind(NULL, (u->flags & UWM_REGS) ? ®s : NULL, (u->flags & UWM_SP) ? u->sp : 0); @@ -452,6 +472,10 @@ static const struct test_params param_list[] = { TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP), TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_REGS), TEST_WITH_FLAGS(UWM_KRETPROBE | UWM_SP | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_REGS), + TEST_WITH_FLAGS(UWM_KRETPROBE_HANDLER | UWM_SP | UWM_REGS), }; /* diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 792f8e0f2178..e563cb65c0c4 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -69,6 +69,7 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; + zpci_zdev_get(zdev); break; } } @@ -399,7 +400,7 @@ EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); + struct zpci_dev *zdev = zdev_from_bus(bus, devfn); return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV; } @@ -407,7 +408,7 @@ static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); + struct zpci_dev *zdev = zdev_from_bus(bus, devfn); return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV; } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index e359d2686178..e96c9860e064 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -19,7 +19,8 @@ void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error); void zpci_release_device(struct kref *kref); static inline void zpci_zdev_put(struct zpci_dev *zdev) { - kref_put(&zdev->kref, zpci_release_device); + if (zdev) + kref_put(&zdev->kref, zpci_release_device); } static inline void zpci_zdev_get(struct zpci_dev *zdev) @@ -32,8 +33,8 @@ void zpci_free_domain(int domain); int zpci_setup_bus_resources(struct zpci_dev *zdev, struct list_head *resources); -static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, - unsigned int devfn) +static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus, + unsigned int devfn) { struct zpci_bus *zbus = bus->sysdata; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 63f3e057c168..1057d7af4a55 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -23,6 +23,8 @@ #include <asm/clp.h> #include <uapi/asm/clp.h> +#include "pci_bus.h" + bool zpci_unique_uid; void update_uid_checking(bool new) @@ -404,8 +406,11 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) return; zdev = get_zdev_by_fid(entry->fid); - if (!zdev) - zpci_create_device(entry->fid, entry->fh, entry->config_state); + if (zdev) { + zpci_zdev_put(zdev); + return; + } + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 2e3e5b278925..ea9db5cea64e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -269,7 +269,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); if (!pdev) - return; + goto no_pdev; switch (ccdf->pec) { case 0x003a: /* Service Action or Error Recovery Successful */ @@ -286,6 +286,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) break; } pci_dev_put(pdev); +no_pdev: + zpci_zdev_put(zdev); } void zpci_event_error(void *data) @@ -314,6 +316,7 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); + bool existing_zdev = !!zdev; enum zpci_state state; zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", @@ -378,6 +381,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (existing_zdev) + zpci_zdev_put(zdev); } void zpci_event_availability(void *data) diff --git a/arch/sh/include/asm/user.h b/arch/sh/include/asm/user.h index 7dfd3f6461e6..12ea0f3f4419 100644 --- a/arch/sh/include/asm/user.h +++ b/arch/sh/include/asm/user.h @@ -52,10 +52,4 @@ struct user { char u_comm[32]; /* user command name */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_DATA_START_ADDR (u.start_data) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) - #endif /* __ASM_SH_USER_H */ diff --git a/arch/sh/kernel/syscalls/Makefile b/arch/sh/kernel/syscalls/Makefile index 6713c65a25e1..b265e4bc16c2 100644 --- a/arch/sh/kernel/syscalls/Makefile +++ b/arch/sh/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/sparc/kernel/syscalls/Makefile b/arch/sparc/kernel/syscalls/Makefile index d63f18dd058d..8440c16dfb22 100644 --- a/arch/sparc/kernel/syscalls/Makefile +++ b/arch/sparc/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 6ead1e240457..8ca67a692683 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -224,7 +224,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -247,6 +247,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c index 5b5b64cb1071..3c62ae81df62 100644 --- a/arch/um/drivers/port_user.c +++ b/arch/um/drivers/port_user.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <errno.h> #include <termios.h> #include <unistd.h> @@ -167,14 +168,29 @@ static void port_pre_exec(void *arg) int port_connection(int fd, int *socket, int *pid_out) { int new, err; - char *argv[] = { "/usr/sbin/in.telnetd", "-L", + char *env; + char *argv[] = { "in.telnetd", "-L", OS_LIB_PATH "/uml/port-helper", NULL }; struct port_pre_exec_data data; + if ((env = getenv("UML_PORT_HELPER"))) + argv[2] = env; + new = accept(fd, NULL, 0); if (new < 0) return -errno; + err = os_access(argv[2], X_OK); + if (err < 0) { + printk(UM_KERN_ERR "port_connection : error accessing port-helper " + "executable at %s: %s\n", argv[2], strerror(-err)); + if (env == NULL) + printk(UM_KERN_ERR "Set UML_PORT_HELPER environment " + "variable to path to uml-utilities port-helper " + "binary\n"); + goto out_close; + } + err = os_pipe(socket, 0, 0); if (err < 0) goto out_close; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 69d2d0049a61..b03269faef71 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1526,13 +1526,19 @@ static void do_io(struct io_thread_req *req, struct io_desc *desc) } break; case REQ_OP_DISCARD: - case REQ_OP_WRITE_ZEROES: n = os_falloc_punch(req->fds[bit], off, len); if (n) { req->error = map_error(-n); return; } break; + case REQ_OP_WRITE_ZEROES: + n = os_falloc_zeroes(req->fds[bit], off, len); + if (n) { + req->error = map_error(-n); + return; + } + break; default: WARN_ON_ONCE(1); req->error = BLK_STS_NOTSUPP; diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 4fc1a5d70dcf..1d6f6a66766c 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -67,6 +67,7 @@ static LIST_HEAD(vector_devices); static int driver_registered; static void vector_eth_configure(int n, struct arglist *def); +static int vector_mmsg_rx(struct vector_private *vp, int budget); /* Argument accessors to set variables (and/or set default values) * mtu, buffer sizing, default headroom, etc @@ -77,7 +78,6 @@ static void vector_eth_configure(int n, struct arglist *def); #define DEFAULT_VECTOR_SIZE 64 #define TX_SMALL_PACKET 128 #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) -#define MAX_ITERATIONS 64 static const struct { const char string[ETH_GSTRING_LEN]; @@ -458,7 +458,6 @@ static int vector_send(struct vector_queue *qi) vp->estats.tx_queue_running_average = (vp->estats.tx_queue_running_average + result) >> 1; } - netif_trans_update(qi->dev); netif_wake_queue(qi->dev); /* if TX is busy, break out of the send loop, * poll write IRQ will reschedule xmit for us @@ -470,8 +469,6 @@ static int vector_send(struct vector_queue *qi) } } spin_unlock(&qi->head_lock); - } else { - tasklet_schedule(&vp->tx_poll); } return queue_depth; } @@ -608,7 +605,7 @@ out_fail: /* * We do not use the RX queue as a proper wraparound queue for now - * This is not necessary because the consumption via netif_rx() + * This is not necessary because the consumption via napi_gro_receive() * happens in-line. While we can try using the return code of * netif_rx() for flow control there are no drivers doing this today. * For this RX specific use we ignore the tail/head locks and @@ -896,7 +893,7 @@ static int vector_legacy_rx(struct vector_private *vp) skb->protocol = eth_type_trans(skb, skb->dev); vp->dev->stats.rx_bytes += skb->len; vp->dev->stats.rx_packets++; - netif_rx(skb); + napi_gro_receive(&vp->napi, skb); } else { dev_kfree_skb_irq(skb); } @@ -955,7 +952,7 @@ drop: * mmsg vector matched to an skb vector which we prepared earlier. */ -static int vector_mmsg_rx(struct vector_private *vp) +static int vector_mmsg_rx(struct vector_private *vp, int budget) { int packet_count, i; struct vector_queue *qi = vp->rx_queue; @@ -972,6 +969,9 @@ static int vector_mmsg_rx(struct vector_private *vp) /* Fire the Lazy Gun - get as many packets as we can in one go. */ + if (budget > qi->max_depth) + budget = qi->max_depth; + packet_count = uml_vector_recvmmsg( vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); @@ -1021,7 +1021,7 @@ static int vector_mmsg_rx(struct vector_private *vp) */ vp->dev->stats.rx_bytes += skb->len; vp->dev->stats.rx_packets++; - netif_rx(skb); + napi_gro_receive(&vp->napi, skb); } else { /* Overlay header too short to do anything - discard. * We can actually keep this skb and reuse it, @@ -1044,23 +1044,6 @@ static int vector_mmsg_rx(struct vector_private *vp) return packet_count; } -static void vector_rx(struct vector_private *vp) -{ - int err; - int iter = 0; - - if ((vp->options & VECTOR_RX) > 0) - while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS)) - iter++; - else - while (((err = vector_legacy_rx(vp)) > 0) && (iter < MAX_ITERATIONS)) - iter++; - if ((err != 0) && net_ratelimit()) - netdev_err(vp->dev, "vector_rx: error(%d)\n", err); - if (iter == MAX_ITERATIONS) - netdev_err(vp->dev, "vector_rx: device stuck, remote end may have closed the connection\n"); -} - static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct vector_private *vp = netdev_priv(dev); @@ -1085,25 +1068,15 @@ static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_sent_queue(vp->dev, skb->len); queue_depth = vector_enqueue(vp->tx_queue, skb); - /* if the device queue is full, stop the upper layers and - * flush it. - */ - - if (queue_depth >= vp->tx_queue->max_depth - 1) { - vp->estats.tx_kicks++; - netif_stop_queue(dev); - vector_send(vp->tx_queue); - return NETDEV_TX_OK; - } - if (netdev_xmit_more()) { + if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) { mod_timer(&vp->tl, vp->coalesce); return NETDEV_TX_OK; + } else { + queue_depth = vector_send(vp->tx_queue); + if (queue_depth > 0) + napi_schedule(&vp->napi); } - if (skb->len < TX_SMALL_PACKET) { - vp->estats.tx_kicks++; - vector_send(vp->tx_queue); - } else - tasklet_schedule(&vp->tx_poll); + return NETDEV_TX_OK; } @@ -1114,7 +1087,7 @@ static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) if (!netif_running(dev)) return IRQ_NONE; - vector_rx(vp); + napi_schedule(&vp->napi); return IRQ_HANDLED; } @@ -1133,8 +1106,7 @@ static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) * tweaking the IRQ mask less costly */ - if (vp->in_write_poll) - tasklet_schedule(&vp->tx_poll); + napi_schedule(&vp->napi); return IRQ_HANDLED; } @@ -1161,7 +1133,8 @@ static int vector_net_close(struct net_device *dev) um_free_irq(vp->tx_irq, dev); vp->tx_irq = 0; } - tasklet_kill(&vp->tx_poll); + napi_disable(&vp->napi); + netif_napi_del(&vp->napi); if (vp->fds->rx_fd > 0) { if (vp->bpf) uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); @@ -1193,15 +1166,32 @@ static int vector_net_close(struct net_device *dev) return 0; } -/* TX tasklet */ - -static void vector_tx_poll(struct tasklet_struct *t) +static int vector_poll(struct napi_struct *napi, int budget) { - struct vector_private *vp = from_tasklet(vp, t, tx_poll); + struct vector_private *vp = container_of(napi, struct vector_private, napi); + int work_done = 0; + int err; + bool tx_enqueued = false; - vp->estats.tx_kicks++; - vector_send(vp->tx_queue); + if ((vp->options & VECTOR_TX) != 0) + tx_enqueued = (vector_send(vp->tx_queue) > 0); + if ((vp->options & VECTOR_RX) > 0) + err = vector_mmsg_rx(vp, budget); + else { + err = vector_legacy_rx(vp); + if (err > 0) + err = 1; + } + if (err > 0) + work_done += err; + + if (tx_enqueued || err > 0) + napi_schedule(napi); + if (work_done < budget) + napi_complete_done(napi, work_done); + return work_done; } + static void vector_reset_tx(struct work_struct *work) { struct vector_private *vp = @@ -1265,6 +1255,9 @@ static int vector_net_open(struct net_device *dev) goto out_close; } + netif_napi_add(vp->dev, &vp->napi, vector_poll, get_depth(vp->parsed)); + napi_enable(&vp->napi); + /* READ IRQ */ err = um_request_irq( irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, @@ -1306,15 +1299,15 @@ static int vector_net_open(struct net_device *dev) uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); netif_start_queue(dev); + vector_reset_stats(vp); /* clear buffer - it can happen that the host side of the interface * is full when we get here. In this case, new data is never queued, * SIGIOs never arrive, and the net never works. */ - vector_rx(vp); + napi_schedule(&vp->napi); - vector_reset_stats(vp); vdevice = find_device(vp->unit); vdevice->opened = 1; @@ -1543,15 +1536,16 @@ static const struct net_device_ops vector_netdev_ops = { #endif }; - static void vector_timer_expire(struct timer_list *t) { struct vector_private *vp = from_timer(vp, t, tl); vp->estats.tx_kicks++; - vector_send(vp->tx_queue); + napi_schedule(&vp->napi); } + + static void vector_eth_configure( int n, struct arglist *def @@ -1634,7 +1628,6 @@ static void vector_eth_configure( }); dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); - tasklet_setup(&vp->tx_poll, vector_tx_poll); INIT_WORK(&vp->reset_tx, vector_reset_tx); timer_setup(&vp->tl, vector_timer_expire, 0); diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h index 8fff93a75a92..2a1fa8e0f3e1 100644 --- a/arch/um/drivers/vector_kern.h +++ b/arch/um/drivers/vector_kern.h @@ -14,6 +14,7 @@ #include <linux/ctype.h> #include <linux/workqueue.h> #include <linux/interrupt.h> + #include "vector_user.h" /* Queue structure specially adapted for multiple enqueue/dequeue @@ -72,6 +73,7 @@ struct vector_private { struct list_head list; spinlock_t lock; struct net_device *dev; + struct napi_struct napi ____cacheline_aligned; int unit; @@ -115,7 +117,6 @@ struct vector_private { spinlock_t stats_lock; - struct tasklet_struct tx_poll; bool rexmit_scheduled; bool opened; bool in_write_poll; diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index e4ffeb9a1fa4..c650e428432b 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -771,7 +771,7 @@ int uml_vector_detach_bpf(int fd, void *bpf) printk(KERN_ERR BPF_DETACH_FAIL, prog->len, prog->filter, fd, -errno); return err; } -void *uml_vector_default_bpf(void *mac) +void *uml_vector_default_bpf(const void *mac) { struct sock_filter *bpf; uint32_t *mac1 = (uint32_t *)(mac + 2); diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h index d29d5fdd98fa..3a73d17a0161 100644 --- a/arch/um/drivers/vector_user.h +++ b/arch/um/drivers/vector_user.h @@ -97,7 +97,7 @@ extern int uml_vector_recvmmsg( unsigned int vlen, unsigned int flags ); -extern void *uml_vector_default_bpf(void *mac); +extern void *uml_vector_default_bpf(const void *mac); extern void *uml_vector_user_bpf(char *filename); extern int uml_vector_attach_bpf(int fd, void *bpf); extern int uml_vector_detach_bpf(int fd, void *bpf); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index b08bd2966253..f1f3f52f1e9c 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += softirq_stack.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h -generic-y += word-at-a-time.h generic-y += kprobes.h generic-y += mm_hooks.h generic-y += vga.h diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h index f512704a9ec7..22b39de73c24 100644 --- a/arch/um/include/asm/xor.h +++ b/arch/um/include/asm/xor.h @@ -4,8 +4,10 @@ #ifdef CONFIG_64BIT #undef CONFIG_X86_32 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_sse_pf64)) #else #define CONFIG_X86_32 1 +#define TT_CPU_INF_XOR_DEFAULT (AVX_SELECT(&xor_block_8regs)) #endif #include <asm/cpufeature.h> @@ -16,7 +18,7 @@ #undef XOR_SELECT_TEMPLATE /* pick an arbitrary one - measuring isn't possible with inf-cpu */ #define XOR_SELECT_TEMPLATE(x) \ - (time_travel_mode == TT_MODE_INFCPU ? &xor_block_8regs : NULL) + (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)) #endif #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 00214059d9ec..fafde1d5416e 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -168,6 +168,7 @@ extern unsigned os_major(unsigned long long dev); extern unsigned os_minor(unsigned long long dev); extern unsigned long long os_makedev(unsigned major, unsigned minor); extern int os_falloc_punch(int fd, unsigned long long offset, int count); +extern int os_falloc_zeroes(int fd, unsigned long long offset, int count); extern int os_eventfd(unsigned int initval, int flags); extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, unsigned int fds_num); diff --git a/arch/um/kernel/dtb.c b/arch/um/kernel/dtb.c index ca69d72025f3..484141b06938 100644 --- a/arch/um/kernel/dtb.c +++ b/arch/um/kernel/dtb.c @@ -25,8 +25,8 @@ void uml_dtb_init(void) return; } - unflatten_device_tree(); early_init_fdt_scan_reserved_mem(); + unflatten_device_tree(); } static int __init uml_dtb_setup(char *line, int *add) diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index e4421dbc4c36..fc4450db59bd 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -625,6 +625,15 @@ int os_falloc_punch(int fd, unsigned long long offset, int len) return n; } +int os_falloc_zeroes(int fd, unsigned long long offset, int len) +{ + int n = fallocate(fd, FALLOC_FL_ZERO_RANGE|FALLOC_FL_KEEP_SIZE, offset, len); + + if (n < 0) + return -errno; + return n; +} + int os_eventfd(unsigned int initval, int flags) { int fd = eventfd(initval, flags); diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 32e88baf18dd..b459745f52e2 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -4,6 +4,7 @@ */ #include <stdlib.h> +#include <string.h> #include <unistd.h> #include <errno.h> #include <sched.h> @@ -99,6 +100,10 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) CATCH_EINTR(waitpid(pid, NULL, __WALL)); } + if (ret < 0) + printk(UM_KERN_ERR "run_helper : failed to exec %s on host: %s\n", + argv[0], strerror(-ret)); + out_free2: kfree(data.buf); out_close: diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index 6c5041c5560b..4d5591d96d8c 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -18,12 +18,6 @@ static timer_t event_high_res_timer = 0; -static inline long long timeval_to_ns(const struct timeval *tv) -{ - return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + - tv->tv_usec * UM_NSEC_PER_USEC; -} - static inline long long timespec_to_ns(const struct timespec *ts) { return ((long long) ts->tv_sec * UM_NSEC_PER_SEC) + ts->tv_nsec; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 442a426e8a68..b0142e01002e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -223,6 +223,7 @@ config X86 select HAVE_KPROBES_ON_FTRACE select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES + select HAVE_RETHOOK select HAVE_KVM select HAVE_LIVEPATCH if X86_64 select HAVE_MIXED_BREAKPOINTS_REGS diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 946f74dd6fba..259383e1ad44 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -172,7 +172,7 @@ SYM_FUNC_START(chacha_2block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -438,7 +438,7 @@ SYM_FUNC_START(chacha_4block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 71fae5a09e56..2077ce7a5647 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -297,7 +297,7 @@ ___ $code.=<<___; mov \$1,%eax .Lno_key: - ret + RET ___ &end_function("poly1305_init_x86_64"); @@ -373,7 +373,7 @@ $code.=<<___; .cfi_adjust_cfa_offset -48 .Lno_data: .Lblocks_epilogue: - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_x86_64"); @@ -399,7 +399,7 @@ $code.=<<___; mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_x86_64"); if ($avx) { @@ -429,7 +429,7 @@ ___ &poly1305_iteration(); $code.=<<___; pop $ctx - ret + RET .size __poly1305_block,.-__poly1305_block .type __poly1305_init_avx,\@abi-omnipotent @@ -594,7 +594,7 @@ __poly1305_init_avx: lea -48-64($ctx),$ctx # size [de-]optimization pop %rbp - ret + RET .size __poly1305_init_avx,.-__poly1305_init_avx ___ @@ -747,7 +747,7 @@ $code.=<<___; .cfi_restore %rbp .Lno_data_avx: .Lblocks_avx_epilogue: - ret + RET .cfi_endproc .align 32 @@ -1452,7 +1452,7 @@ $code.=<<___ if (!$win64); ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ &end_function("poly1305_blocks_avx"); @@ -1508,7 +1508,7 @@ $code.=<<___; mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET ___ &end_function("poly1305_emit_avx"); @@ -1675,7 +1675,7 @@ $code.=<<___; .cfi_restore %rbp .Lno_data_avx2$suffix: .Lblocks_avx2_epilogue$suffix: - ret + RET .cfi_endproc .align 32 @@ -2201,7 +2201,7 @@ $code.=<<___ if (!$win64); ___ $code.=<<___; vzeroupper - ret + RET .cfi_endproc ___ if($avx > 2 && $avx512) { @@ -2792,7 +2792,7 @@ $code.=<<___ if (!$win64); .cfi_def_cfa_register %rsp ___ $code.=<<___; - ret + RET .cfi_endproc ___ @@ -2893,7 +2893,7 @@ $code.=<<___ if ($flavour =~ /elf32/); ___ $code.=<<___; mov \$1,%eax - ret + RET .size poly1305_init_base2_44,.-poly1305_init_base2_44 ___ { @@ -3010,7 +3010,7 @@ poly1305_blocks_vpmadd52: jnz .Lblocks_vpmadd52_4x .Lno_data_vpmadd52: - ret + RET .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52 ___ } @@ -3451,7 +3451,7 @@ poly1305_blocks_vpmadd52_4x: vzeroall .Lno_data_vpmadd52_4x: - ret + RET .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x ___ } @@ -3824,7 +3824,7 @@ $code.=<<___; vzeroall .Lno_data_vpmadd52_8x: - ret + RET .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x ___ } @@ -3861,7 +3861,7 @@ poly1305_emit_base2_44: mov %rax,0($mac) # write result mov %rcx,8($mac) - ret + RET .size poly1305_emit_base2_44,.-poly1305_emit_base2_44 ___ } } } @@ -3916,7 +3916,7 @@ xor128_encrypt_n_pad: .Ldone_enc: mov $otp,%rax - ret + RET .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad .globl xor128_decrypt_n_pad @@ -3967,7 +3967,7 @@ xor128_decrypt_n_pad: .Ldone_dec: mov $otp,%rax - ret + RET .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad ___ } @@ -4109,7 +4109,7 @@ avx_handler: pop %rbx pop %rdi pop %rsi - ret + RET .size avx_handler,.-avx_handler .section .pdata diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S index 71e6aae23e17..b12b9efb5ec5 100644 --- a/arch/x86/crypto/sm3-avx-asm_64.S +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -513,5 +513,5 @@ SYM_FUNC_START(sm3_transform_avx) movq %rbp, %rsp; popq %rbp; - ret; + RET; SYM_FUNC_END(sm3_transform_avx) diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 7f3886eeb2ff..eca5d6eff132 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -3,8 +3,7 @@ out := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm # Create output directory if not already present -_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ - $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') +$(shell mkdir -p $(out) $(uapi)) syscall32 := $(src)/syscall_32.tbl syscall64 := $(src)/syscall_64.tbl diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4138939532c6..d23e80a56eb8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -249,6 +249,7 @@ enum x86_intercept_stage; #define PFERR_SGX_BIT 15 #define PFERR_GUEST_FINAL_BIT 32 #define PFERR_GUEST_PAGE_BIT 33 +#define PFERR_IMPLICIT_ACCESS_BIT 48 #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) @@ -259,6 +260,7 @@ enum x86_intercept_stage; #define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) #define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ @@ -430,7 +432,7 @@ struct kvm_mmu { void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t gva_or_gpa, u32 access, + gpa_t gva_or_gpa, u64 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); @@ -512,6 +514,7 @@ struct kvm_pmu { u64 global_ctrl_mask; u64 global_ovf_ctrl_mask; u64 reserved_bits; + u64 raw_event_mask; u8 version; struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; @@ -1040,14 +1043,16 @@ struct kvm_x86_msr_filter { struct msr_bitmap_range ranges[16]; }; -#define APICV_INHIBIT_REASON_DISABLE 0 -#define APICV_INHIBIT_REASON_HYPERV 1 -#define APICV_INHIBIT_REASON_NESTED 2 -#define APICV_INHIBIT_REASON_IRQWIN 3 -#define APICV_INHIBIT_REASON_PIT_REINJ 4 -#define APICV_INHIBIT_REASON_X2APIC 5 -#define APICV_INHIBIT_REASON_BLOCKIRQ 6 -#define APICV_INHIBIT_REASON_ABSENT 7 +enum kvm_apicv_inhibit { + APICV_INHIBIT_REASON_DISABLE, + APICV_INHIBIT_REASON_HYPERV, + APICV_INHIBIT_REASON_NESTED, + APICV_INHIBIT_REASON_IRQWIN, + APICV_INHIBIT_REASON_PIT_REINJ, + APICV_INHIBIT_REASON_X2APIC, + APICV_INHIBIT_REASON_BLOCKIRQ, + APICV_INHIBIT_REASON_ABSENT, +}; struct kvm_arch { unsigned long n_used_mmu_pages; @@ -1401,7 +1406,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - bool (*check_apicv_inhibit_reasons)(ulong bit); + bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); @@ -1585,7 +1590,7 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_init_vm(struct kvm *kvm); +int kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); @@ -1795,11 +1800,22 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, bool kvm_apicv_activated(struct kvm *kvm); void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); -void kvm_request_apicv_update(struct kvm *kvm, bool activate, - unsigned long bit); +void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); +void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set); + +static inline void kvm_set_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, true); +} -void __kvm_request_apicv_update(struct kvm *kvm, bool activate, - unsigned long bit); +static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason) +{ + kvm_set_or_clear_apicv_inhibit(kvm, reason, false); +} int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 7eb2df5417fb..f70a5108d464 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -221,8 +221,14 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) +#define SVM_TSC_RATIO_RSVD 0xffffff0000000000ULL +#define SVM_TSC_RATIO_MIN 0x0000000000000001ULL +#define SVM_TSC_RATIO_MAX 0x000000ffffffffffULL +#define SVM_TSC_RATIO_DEFAULT 0x0100000000ULL + + /* AVIC */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFFULL) #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) @@ -230,9 +236,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) -#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFF) +#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL) + +#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) -#define AVIC_DOORBELL_PHYSICAL_ID_MASK (0xFF) +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 2a1f8734416d..7cede4dc21f0 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,7 +4,7 @@ #include <linux/sched.h> #include <linux/ftrace.h> -#include <linux/kprobes.h> +#include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -16,7 +16,7 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; -#ifdef CONFIG_KRETPROBES +#if defined(CONFIG_RETHOOK) struct llist_node *kr_cur; #endif bool error; @@ -104,19 +104,18 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, #endif static inline -unsigned long unwind_recover_kretprobe(struct unwind_state *state, - unsigned long addr, unsigned long *addr_p) +unsigned long unwind_recover_rethook(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) { -#ifdef CONFIG_KRETPROBES - return is_kretprobe_trampoline(addr) ? - kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) : - addr; -#else - return addr; +#ifdef CONFIG_RETHOOK + if (is_rethook_trampoline(addr)) + return rethook_find_ret_addr(state->task, (unsigned long)addr_p, + &state->kr_cur); #endif + return addr; } -/* Recover the return address modified by kretprobe and ftrace_graph. */ +/* Recover the return address modified by rethook and ftrace_graph. */ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) @@ -125,7 +124,7 @@ unsigned long unwind_recover_ret_addr(struct unwind_state *state, ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); - return unwind_recover_kretprobe(state, ret, addr_p); + return unwind_recover_rethook(state, ret, addr_p); } /* diff --git a/arch/x86/include/asm/user_32.h b/arch/x86/include/asm/user_32.h index d72c3d66e94f..8963915e533f 100644 --- a/arch/x86/include/asm/user_32.h +++ b/arch/x86/include/asm/user_32.h @@ -124,9 +124,5 @@ struct user{ char u_comm[32]; /* User command that was responsible */ int u_debugreg[8]; }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _ASM_X86_USER_32_H */ diff --git a/arch/x86/include/asm/user_64.h b/arch/x86/include/asm/user_64.h index db909923611c..1dd10f07ccd6 100644 --- a/arch/x86/include/asm/user_64.h +++ b/arch/x86/include/asm/user_64.h @@ -130,9 +130,5 @@ struct user { unsigned long error_code; /* CPU error code or 0 */ unsigned long fault_address; /* CR3 or 0 */ }; -#define NBPG PAGE_SIZE -#define UPAGES 1 -#define HOST_TEXT_START_ADDR (u.start_code) -#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) #endif /* _ASM_X86_USER_64_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6462e3dd98f4..c41ef42adbe8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 7d3a2e2daf01..c993521d4933 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -6,6 +6,7 @@ #include <asm/asm.h> #include <asm/frame.h> +#include <asm/insn.h> #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 8ef933c03afa..7c4ab8870da4 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -811,18 +811,6 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs, = (regs->flags & X86_EFLAGS_IF); } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long *sara = stack_addr(regs); - - ri->ret_addr = (kprobe_opcode_t *) *sara; - ri->fp = sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { @@ -1023,101 +1011,6 @@ int kprobe_int3_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_int3_handler); -/* - * When a retprobed function returns, this code saves registers and - * calls trampoline_handler() runs, which calls the kretprobe's handler. - */ -asm( - ".text\n" - ".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" -#ifdef CONFIG_X86_64 - ANNOTATE_NOENDBR - /* Push a fake return address to tell the unwinder it's a kretprobe. */ - " pushq $__kretprobe_trampoline\n" - UNWIND_HINT_FUNC - /* Save the 'sp - 8', this will be fixed later. */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rdi\n" - " call trampoline_handler\n" - RESTORE_REGS_STRING - /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ - " addq $8, %rsp\n" - " popfq\n" -#else - /* Push a fake return address to tell the unwinder it's a kretprobe. */ - " pushl $__kretprobe_trampoline\n" - UNWIND_HINT_FUNC - /* Save the 'sp - 4', this will be fixed later. */ - " pushl %esp\n" - " pushfl\n" - SAVE_REGS_STRING - " movl %esp, %eax\n" - " call trampoline_handler\n" - RESTORE_REGS_STRING - /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ - " addl $4, %esp\n" - " popfl\n" -#endif - ASM_RET - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n" -); -NOKPROBE_SYMBOL(__kretprobe_trampoline); -/* - * __kretprobe_trampoline() skips updating frame pointer. The frame pointer - * saved in trampoline_handler() points to the real caller function's - * frame pointer. Thus the __kretprobe_trampoline() doesn't have a - * standard stack frame with CONFIG_FRAME_POINTER=y. - * Let's mark it non-standard function. Anyway, FP unwinder can correctly - * unwind without the hint. - */ -STACK_FRAME_NON_STANDARD_FP(__kretprobe_trampoline); - -/* This is called from kretprobe_trampoline_handler(). */ -void arch_kretprobe_fixup_return(struct pt_regs *regs, - kprobe_opcode_t *correct_ret_addr) -{ - unsigned long *frame_pointer = ®s->sp + 1; - - /* Replace fake return address with real one. */ - *frame_pointer = (unsigned long)correct_ret_addr; -} - -/* - * Called from __kretprobe_trampoline - */ -__used __visible void trampoline_handler(struct pt_regs *regs) -{ - unsigned long *frame_pointer; - - /* fixup registers */ - regs->cs = __KERNEL_CS; -#ifdef CONFIG_X86_32 - regs->gs = 0; -#endif - regs->ip = (unsigned long)&__kretprobe_trampoline; - regs->orig_ax = ~0UL; - regs->sp += sizeof(long); - frame_pointer = ®s->sp + 1; - - /* - * The return address at 'frame_pointer' is recovered by the - * arch_kretprobe_fixup_return() which called from the - * kretprobe_trampoline_handler(). - */ - kretprobe_trampoline_handler(regs, frame_pointer); - - /* - * Copy FLAGS to 'pt_regs::sp' so that __kretprobe_trapmoline() - * can do RET right after POPF. - */ - regs->sp = regs->flags; -} -NOKPROBE_SYMBOL(trampoline_handler); - int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index b4a54a52aa59..e6b8c5362b94 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -106,7 +106,8 @@ asm ( ".global optprobe_template_entry\n" "optprobe_template_entry:\n" #ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ + " pushq $" __stringify(__KERNEL_DS) "\n" + /* Save the 'sp - 8', this will be fixed later. */ " pushq %rsp\n" " pushfq\n" ".global optprobe_template_clac\n" @@ -121,14 +122,17 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 - /* Move flags to rsp */ + /* Copy 'regs->flags' into 'regs->ss'. */ " movq 18*8(%rsp), %rdx\n" - " movq %rdx, 19*8(%rsp)\n" + " movq %rdx, 20*8(%rsp)\n" RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" + /* Skip 'regs->flags' and 'regs->sp'. */ + " addq $16, %rsp\n" + /* And pop flags register from 'regs->ss'. */ " popfq\n" #else /* CONFIG_X86_32 */ + " pushl %ss\n" + /* Save the 'sp - 4', this will be fixed later. */ " pushl %esp\n" " pushfl\n" ".global optprobe_template_clac\n" @@ -142,12 +146,13 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 - /* Move flags into esp */ + /* Copy 'regs->flags' into 'regs->ss'. */ " movl 14*4(%esp), %edx\n" - " movl %edx, 15*4(%esp)\n" + " movl %edx, 16*4(%esp)\n" RESTORE_REGS_STRING - /* Skip flags entry */ - " addl $4, %esp\n" + /* Skip 'regs->flags' and 'regs->sp'. */ + " addl $8, %esp\n" + /* And pop flags register from 'regs->ss'. */ " popfl\n" #endif ".global optprobe_template_end\n" @@ -179,6 +184,8 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) kprobes_inc_nmissed_count(&op->kp); } else { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* Adjust stack pointer */ + regs->sp += sizeof(long); /* Save skipped registers */ regs->cs = __KERNEL_CS; #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 79e0b8d63ffa..a22deb58f86d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -517,7 +517,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { ipi_bitmap <<= min - apic_id; min = apic_id; - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, diff --git a/arch/x86/kernel/rethook.c b/arch/x86/kernel/rethook.c new file mode 100644 index 000000000000..8a1c0111ae79 --- /dev/null +++ b/arch/x86/kernel/rethook.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * x86 implementation of rethook. Mostly copied from arch/x86/kernel/kprobes/core.c. + */ +#include <linux/bug.h> +#include <linux/rethook.h> +#include <linux/kprobes.h> +#include <linux/objtool.h> + +#include "kprobes/common.h" + +__visible void arch_rethook_trampoline_callback(struct pt_regs *regs); + +#ifndef ANNOTATE_NOENDBR +#define ANNOTATE_NOENDBR +#endif + +/* + * When a target function returns, this code saves registers and calls + * arch_rethook_trampoline_callback(), which calls the rethook handler. + */ +asm( + ".text\n" + ".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" +#ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* This is only jumped from ret instruction */ + /* Push a fake return address to tell the unwinder it's a rethook. */ + " pushq $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + " pushq $" __stringify(__KERNEL_DS) "\n" + /* Save the 'sp - 16', this will be fixed later. */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rdi\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->ss'. */ + " addq $16, %rsp\n" + " popfq\n" +#else + /* Push a fake return address to tell the unwinder it's a rethook. */ + " pushl $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + " pushl %ss\n" + /* Save the 'sp - 8', this will be fixed later. */ + " pushl %esp\n" + " pushfl\n" + SAVE_REGS_STRING + " movl %esp, %eax\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->ss'. */ + " addl $8, %esp\n" + " popfl\n" +#endif + ASM_RET + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n" +); +NOKPROBE_SYMBOL(arch_rethook_trampoline); + +/* + * Called from arch_rethook_trampoline + */ +__used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + unsigned long *frame_pointer; + + /* fixup registers */ + regs->cs = __KERNEL_CS; +#ifdef CONFIG_X86_32 + regs->gs = 0; +#endif + regs->ip = (unsigned long)&arch_rethook_trampoline; + regs->orig_ax = ~0UL; + regs->sp += 2*sizeof(long); + frame_pointer = (long *)(regs + 1); + + /* + * The return address at 'frame_pointer' is recovered by the + * arch_rethook_fixup_return() which called from this + * rethook_trampoline_handler(). + */ + rethook_trampoline_handler(regs, (unsigned long)frame_pointer); + + /* + * Copy FLAGS to 'pt_regs::ss' so that arch_rethook_trapmoline() + * can do RET right after POPF. + */ + *(unsigned long *)®s->ss = regs->flags; +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +/* + * arch_rethook_trampoline() skips updating frame pointer. The frame pointer + * saved in arch_rethook_trampoline_callback() points to the real caller + * function's frame pointer. Thus the arch_rethook_trampoline() doesn't have + * a standard stack frame with CONFIG_FRAME_POINTER=y. + * Let's mark it non-standard function. Anyway, FP unwinder can correctly + * unwind without the hint. + */ +STACK_FRAME_NON_STANDARD_FP(arch_rethook_trampoline); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + unsigned long *frame_pointer = (void *)(regs + 1); + + /* Replace fake return address with real one. */ + *frame_pointer = correct_ret_addr; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + unsigned long *stack = (unsigned long *)regs->sp; + + rh->ret_addr = stack[0]; + rh->frame = regs->sp; + + /* Replace the return addr with trampoline addr */ + stack[0] = (unsigned long) arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 2de3c8c5eba9..794fdef2501a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -550,15 +550,15 @@ bool unwind_next_frame(struct unwind_state *state) } /* * There is a small chance to interrupt at the entry of - * __kretprobe_trampoline() where the ORC info doesn't exist. - * That point is right after the RET to __kretprobe_trampoline() + * arch_rethook_trampoline() where the ORC info doesn't exist. + * That point is right after the RET to arch_rethook_trampoline() * which was modified return address. - * At that point, the @addr_p of the unwind_recover_kretprobe() + * At that point, the @addr_p of the unwind_recover_rethook() * (this has to point the address of the stack entry storing * the modified return address) must be "SP - (a stack entry)" * because SP is incremented by the RET. */ - state->ip = unwind_recover_kretprobe(state, state->ip, + state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); state->regs = (struct pt_regs *)sp; state->prev_regs = NULL; @@ -573,7 +573,7 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } /* See UNWIND_HINT_TYPE_REGS case comment. */ - state->ip = unwind_recover_kretprobe(state, state->ip, + state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); if (state->full_regs) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a00cd97b2623..b24ca7f4ed7c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -735,6 +735,7 @@ static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, if (function > READ_ONCE(max_cpuid_80000000)) return entry; } + break; default: break; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f9c00c89829b..89b11e7dca8a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3540,8 +3540,10 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt) { u64 tsc_aux = 0; - if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + if (!ctxt->ops->guest_has_rdpid(ctxt)) return emulate_ud(ctxt); + + ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux); ctxt->dst.val = tsc_aux; return X86EMUL_CONTINUE; } @@ -3642,7 +3644,7 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); - r = ctxt->ops->set_msr(ctxt, msr_index, msr_data); + r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data); if (r == X86EMUL_IO_NEEDED) return r; @@ -3659,7 +3661,7 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) u64 msr_data; int r; - r = ctxt->ops->get_msr(ctxt, msr_index, &msr_data); + r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data); if (r == X86EMUL_IO_NEEDED) return r; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a32f54ab84a2..123b677111c5 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -122,9 +122,13 @@ static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, else hv->synic_auto_eoi_used--; - __kvm_request_apicv_update(vcpu->kvm, - !hv->synic_auto_eoi_used, - APICV_INHIBIT_REASON_HYPERV); + /* + * Inhibit APICv if any vCPU is using SynIC's AutoEOI, which relies on + * the hypervisor to manually inject IRQs. + */ + __kvm_set_or_clear_apicv_inhibit(vcpu->kvm, + APICV_INHIBIT_REASON_HYPERV, + !!hv->synic_auto_eoi_used); up_write(&vcpu->kvm->arch.apicv_update_lock); } @@ -239,7 +243,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic); int ret; - if (!synic->active && !host) + if (!synic->active && (!host || data)) return 1; trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); @@ -285,6 +289,9 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, case HV_X64_MSR_EOM: { int i; + if (!synic->active) + break; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) kvm_hv_notify_acked_sint(vcpu, i); break; @@ -449,6 +456,9 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) struct kvm_lapic_irq irq; int ret, vector; + if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm)) + return -EINVAL; + if (sint >= ARRAY_SIZE(synic->sint)) return -EINVAL; @@ -661,7 +671,7 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || config)) return 1; if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode && @@ -690,7 +700,7 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer); struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu); - if (!synic->active && !host) + if (!synic->active && (!host || count)) return 1; trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id, diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 0b65a764ed3a..1c83076091af 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -305,15 +305,13 @@ void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject) * So, deactivate APICv when PIT is in reinject mode. */ if (reinject) { - kvm_request_apicv_update(kvm, false, - APICV_INHIBIT_REASON_PIT_REINJ); + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PIT_REINJ); /* The initial state is preserved while ps->reinject == 0. */ kvm_pit_reset_reinject(pit); kvm_register_irq_ack_notifier(kvm, &ps->irq_ack_notifier); kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); } else { - kvm_request_apicv_update(kvm, true, - APICV_INHIBIT_REASON_PIT_REINJ); + kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PIT_REINJ); kvm_unregister_irq_ack_notifier(kvm, &ps->irq_ack_notifier); kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); } diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 840ddb4a9302..8dff25d267b7 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -210,6 +210,8 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); + int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); + int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); @@ -226,6 +228,7 @@ struct x86_emulate_ops { bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt); bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt); + bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 80a2020c4db4..66b0eb0bda94 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1024,6 +1024,10 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, *r = -1; if (irq->shorthand == APIC_DEST_SELF) { + if (KVM_BUG_ON(!src, kvm)) { + *r = 0; + return true; + } *r = kvm_apic_set_irq(src->vcpu, irq, dest_map); return true; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index bf8dbc4bb12a..e6cae6f22683 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -214,27 +214,27 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, */ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned pte_access, unsigned pte_pkey, - unsigned pfec) + u64 access) { - int cpl = static_call(kvm_x86_get_cpl)(vcpu); + /* strip nested paging fault error codes */ + unsigned int pfec = access; unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu); /* - * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1. + * For explicit supervisor accesses, SMAP is disabled if EFLAGS.AC = 1. + * For implicit supervisor accesses, SMAP cannot be overridden. * - * If CPL = 3, SMAP applies to all supervisor-mode data accesses - * (these are implicit supervisor accesses) regardless of the value - * of EFLAGS.AC. + * SMAP works on supervisor accesses only, and not_smap can + * be set or not set when user access with neither has any bearing + * on the result. * - * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving - * the result in X86_EFLAGS_AC. We then insert it in place of - * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec, - * but it will be one in index if SMAP checks are being overridden. - * It is important to keep this branchless. + * We put the SMAP checking bit in place of the PFERR_RSVD_MASK bit; + * this bit will always be zero in pfec, but it will be one in index + * if SMAP checks are being disabled. */ - unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); - int index = (pfec >> 1) + - (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; + bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; + int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1; bool fault = (mmu->permissions[index] >> pte_access) & 1; u32 errcode = PFERR_PRESENT_MASK; @@ -317,12 +317,12 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count) atomic64_add(count, &kvm->stat.pages[level - 1]); } -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access, struct x86_exception *exception); static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t gpa, u32 access, + gpa_t gpa, u64 access, struct x86_exception *exception) { if (mmu != &vcpu->arch.nested_mmu) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 51671cb34fb6..8f19ea752704 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2696,8 +2696,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, if (*sptep == spte) { ret = RET_PF_SPURIOUS; } else { - trace_kvm_mmu_set_spte(level, gfn, sptep); flush |= mmu_spte_update(sptep, spte); + trace_kvm_mmu_set_spte(level, gfn, sptep); } if (wrprot) { @@ -3703,7 +3703,7 @@ void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu) } static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t vaddr, u32 access, + gpa_t vaddr, u64 access, struct x86_exception *exception) { if (exception) @@ -4591,11 +4591,11 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept) * - X86_CR4_SMAP is set in CR4 * - A user page is accessed * - The access is not a fetch - * - Page fault in kernel mode - * - if CPL = 3 or X86_EFLAGS_AC is clear + * - The access is supervisor mode + * - If implicit supervisor access or X86_EFLAGS_AC is clear * - * Here, we cover the first three conditions. - * The fourth is computed dynamically in permission_fault(); + * Here, we cover the first four conditions. + * The fifth is computed dynamically in permission_fault(); * PFERR_RSVD_MASK bit will be set in PFEC if the access is * *not* subject to SMAP restrictions. */ @@ -5768,17 +5768,24 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, kvm_mmu_zap_all_fast(kvm); } -void kvm_mmu_init_vm(struct kvm *kvm) +int kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + int r; + INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); + INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - kvm_mmu_init_tdp_mmu(kvm); + r = kvm_mmu_init_tdp_mmu(kvm); + if (r < 0) + return r; node->track_write = kvm_mmu_pte_write; node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; kvm_page_track_register_notifier(kvm, node); + return 0; } void kvm_mmu_uninit_vm(struct kvm *kvm) @@ -5842,8 +5849,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) if (is_tdp_mmu_enabled(kvm)) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, gfn_start, - gfn_end, flush); + flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start, + gfn_end, true, flush); } if (flush) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 252c77805eb9..01fee5f67ac3 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,36 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + signed char r; - return (ret != orig_pte); + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "=q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "=q" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif + + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, @@ -339,7 +333,7 @@ static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu, */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t addr, u32 access) + gpa_t addr, u64 access) { int ret; pt_element_t pte; @@ -347,7 +341,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gfn_t table_gfn; u64 pt_access, pte_access; unsigned index, accessed_dirty, pte_pkey; - unsigned nested_access; + u64 nested_access; gpa_t pte_gpa; bool have_ad; int offset; @@ -540,7 +534,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gpa_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u64 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -988,7 +982,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) /* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t addr, u32 access, + gpa_t addr, u64 access, struct x86_exception *exception) { struct guest_walker walker; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index e7e7876251b3..d71d177ae6b8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -14,21 +14,24 @@ static bool __read_mostly tdp_mmu_enabled = true; module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644); /* Initializes the TDP MMU for the VM, if enabled. */ -bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) +int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { + struct workqueue_struct *wq; + if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled)) - return false; + return 0; + + wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); + if (!wq) + return -ENOMEM; /* This should not be changed for the lifetime of the VM. */ kvm->arch.tdp_mmu_enabled = true; - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); - kvm->arch.tdp_mmu_zap_wq = - alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); - - return true; + kvm->arch.tdp_mmu_zap_wq = wq; + return 1; } /* Arbitrarily returns true so that this may be used in if statements. */ @@ -906,10 +909,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) } /* - * Tears down the mappings for the range of gfns, [start, end), and frees the - * non-root pages mapping GFNs strictly within that range. Returns true if - * SPTEs have been cleared and a TLB flush is needed before releasing the - * MMU lock. + * Zap leafs SPTEs for the range of gfns, [start, end). Returns true if SPTEs + * have been cleared and a TLB flush is needed before releasing the MMU lock. * * If can_yield is true, will release the MMU lock and reschedule if the * scheduler needs the CPU or there is contention on the MMU lock. If this @@ -917,42 +918,25 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp) * the caller must ensure it does not supply too large a GFN range, or the * operation can cause a soft lockup. */ -static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, - gfn_t start, gfn_t end, bool can_yield, bool flush) +static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root, + gfn_t start, gfn_t end, bool can_yield, bool flush) { - bool zap_all = (start == 0 && end >= tdp_mmu_max_gfn_host()); struct tdp_iter iter; - /* - * No need to try to step down in the iterator when zapping all SPTEs, - * zapping the top-level non-leaf SPTEs will recurse on their children. - */ - int min_level = zap_all ? root->role.level : PG_LEVEL_4K; - end = min(end, tdp_mmu_max_gfn_host()); lockdep_assert_held_write(&kvm->mmu_lock); rcu_read_lock(); - for_each_tdp_pte_min_level(iter, root, min_level, start, end) { + for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) { if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, false)) { flush = false; continue; } - if (!is_shadow_present_pte(iter.old_spte)) - continue; - - /* - * If this is a non-last-level SPTE that covers a larger range - * than should be zapped, continue, and zap the mappings at a - * lower level, except when zapping all SPTEs. - */ - if (!zap_all && - (iter.gfn < start || - iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && + if (!is_shadow_present_pte(iter.old_spte) || !is_last_spte(iter.old_spte, iter.level)) continue; @@ -960,17 +944,13 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, flush = true; } - /* - * Need to flush before releasing RCU. TODO: do it only if intermediate - * page tables were zapped; there is no need to flush under RCU protection - * if no 'struct kvm_mmu_page' is freed. - */ - if (flush) - kvm_flush_remote_tlbs_with_address(kvm, start, end - start); - rcu_read_unlock(); - return false; + /* + * Because this flow zaps _only_ leaf SPTEs, the caller doesn't need + * to provide RCU protection as no 'struct kvm_mmu_page' will be freed. + */ + return flush; } /* @@ -979,13 +959,13 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, * SPTEs have been cleared and a TLB flush is needed before releasing the * MMU lock. */ -bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, - gfn_t end, bool can_yield, bool flush) +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, + bool can_yield, bool flush) { struct kvm_mmu_page *root; for_each_tdp_mmu_root_yield_safe(kvm, root, as_id) - flush = zap_gfn_range(kvm, root, start, end, can_yield, flush); + flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush); return flush; } @@ -1233,8 +1213,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range, bool flush) { - return __kvm_tdp_mmu_zap_gfn_range(kvm, range->slot->as_id, range->start, - range->end, range->may_block, flush); + return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start, + range->end, range->may_block, flush); } typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter, diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 5e5ef2576c81..c163f7cc23ca 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -15,14 +15,8 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared); -bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, +bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end, bool can_yield, bool flush); -static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, - gfn_t start, gfn_t end, bool flush) -{ - return __kvm_tdp_mmu_zap_gfn_range(kvm, as_id, start, end, true, flush); -} - bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp); void kvm_tdp_mmu_zap_all(struct kvm *kvm); void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm); @@ -72,7 +66,7 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, u64 addr, u64 *spte); #ifdef CONFIG_X86_64 -bool kvm_mmu_init_tdp_mmu(struct kvm *kvm); +int kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; } @@ -93,7 +87,7 @@ static inline bool is_tdp_mmu(struct kvm_mmu *mmu) return sp && is_tdp_mmu_page(sp) && sp->root_count; } #else -static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; } +static inline int kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return 0; } static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {} static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; } static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index b1a02993782b..eca39f56c231 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -96,8 +96,7 @@ static void kvm_perf_overflow(struct perf_event *perf_event, static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, bool exclude_user, - bool exclude_kernel, bool intr, - bool in_tx, bool in_tx_cp) + bool exclude_kernel, bool intr) { struct perf_event *event; struct perf_event_attr attr = { @@ -116,16 +115,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, attr.sample_period = get_sample_period(pmc, pmc->counter); - if (in_tx) - attr.config |= HSW_IN_TX; - if (in_tx_cp) { + if ((attr.config & HSW_IN_TX_CHECKPOINTED) && + guest_cpuid_is_intel(pmc->vcpu)) { /* * HSW_IN_TX_CHECKPOINTED is not supported with nonzero * period. Just clear the sample period so at least * allocating the counter doesn't fail. */ attr.sample_period = 0; - attr.config |= HSW_IN_TX_CHECKPOINTED; } event = perf_event_create_kernel_counter(&attr, -1, current, @@ -185,6 +182,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; + struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu); bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) @@ -221,7 +219,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) } if (type == PERF_TYPE_RAW) - config = eventsel & AMD64_RAW_EVENT_MASK; + config = eventsel & pmu->raw_event_mask; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; @@ -232,9 +230,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) pmc_reprogram_counter(pmc, type, config, !(eventsel & ARCH_PERFMON_EVENTSEL_USR), !(eventsel & ARCH_PERFMON_EVENTSEL_OS), - eventsel & ARCH_PERFMON_EVENTSEL_INT, - (eventsel & HSW_IN_TX), - (eventsel & HSW_IN_TX_CHECKPOINTED)); + eventsel & ARCH_PERFMON_EVENTSEL_INT); } EXPORT_SYMBOL_GPL(reprogram_gp_counter); @@ -270,7 +266,7 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx) kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc), !(en_field & 0x2), /* exclude user */ !(en_field & 0x1), /* exclude kernel */ - pmi, false, false); + pmi); } EXPORT_SYMBOL_GPL(reprogram_fixed_counter); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index b37b353ec086..a1cf9c31273b 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -726,7 +726,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; - int idx, ret = -EINVAL; + int idx, ret = 0; if (!kvm_arch_has_assigned_device(kvm) || !irq_remapping_cap(IRQ_POSTING_CAP)) @@ -737,7 +737,13 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, idx = srcu_read_lock(&kvm->irq_srcu); irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); - WARN_ON(guest_irq >= irq_rt->nr_rt_entries); + + if (guest_irq >= irq_rt->nr_rt_entries || + hlist_empty(&irq_rt->map[guest_irq])) { + pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", + guest_irq, irq_rt->nr_rt_entries); + goto out; + } hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { struct vcpu_data vcpu_info; @@ -822,7 +828,7 @@ out: return ret; } -bool avic_check_apicv_inhibit_reasons(ulong bit) +bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | @@ -833,7 +839,7 @@ bool avic_check_apicv_inhibit_reasons(ulong bit) BIT(APICV_INHIBIT_REASON_X2APIC) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ); - return supported & BIT(bit); + return supported & BIT(reason); } diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index d4de52409335..24eb935b6f85 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -262,12 +262,10 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - if (data == pmc->eventsel) - return 0; - if (!(data & pmu->reserved_bits)) { + data &= ~pmu->reserved_bits; + if (data != pmc->eventsel) reprogram_gp_counter(pmc, data); - return 0; - } + return 0; } return 1; @@ -284,6 +282,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xfffffff000280000ull; + pmu->raw_event_mask = AMD64_RAW_EVENT_MASK; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0884c3414a1b..bd4c64b362d2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -62,20 +62,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); #define SEG_TYPE_LDT 2 #define SEG_TYPE_BUSY_TSS16 3 -#define SVM_FEATURE_LBRV (1 << 1) -#define SVM_FEATURE_SVML (1 << 2) -#define SVM_FEATURE_TSC_RATE (1 << 4) -#define SVM_FEATURE_VMCB_CLEAN (1 << 5) -#define SVM_FEATURE_FLUSH_ASID (1 << 6) -#define SVM_FEATURE_DECODE_ASSIST (1 << 7) -#define SVM_FEATURE_PAUSE_FILTER (1 << 10) - #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) -#define TSC_RATIO_RSVD 0xffffff0000000000ULL -#define TSC_RATIO_MIN 0x0000000000000001ULL -#define TSC_RATIO_MAX 0x000000ffffffffffULL - static bool erratum_383_found __read_mostly; u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; @@ -87,7 +75,6 @@ u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; static uint64_t osvw_len = 4, osvw_status; static DEFINE_PER_CPU(u64, current_tsc_ratio); -#define TSC_RATIO_DEFAULT 0x0100000000ULL static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ @@ -480,7 +467,7 @@ static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (tsc_scaling) - wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); + wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); cpu_svm_disable(); @@ -526,8 +513,8 @@ static int svm_hardware_enable(void) * Set the default value, even if we don't use TSC scaling * to avoid having stale value in the msr */ - wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); - __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); + wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); + __this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT); } @@ -2723,7 +2710,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; } - if (data & TSC_RATIO_RSVD) + if (data & SVM_TSC_RATIO_RSVD) return 1; svm->tsc_ratio_msr = data; @@ -2918,7 +2905,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu) * In this case AVIC was temporarily disabled for * requesting the IRQ window and we have to re-enable it. */ - kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN); + kvm_clear_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN); ++vcpu->stat.irq_window_exits; return 1; @@ -3516,7 +3503,7 @@ static void svm_enable_irq_window(struct kvm_vcpu *vcpu) * via AVIC. In such case, we need to temporarily disable AVIC, * and fallback to injecting IRQ via V_IRQ. */ - kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN); + kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_IRQWIN); svm_set_vintr(svm); } } @@ -3948,6 +3935,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_cpuid_entry2 *best; + struct kvm *kvm = vcpu->kvm; vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3974,16 +3962,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) * is exposed to the guest, disable AVIC. */ if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC)) - kvm_request_apicv_update(vcpu->kvm, false, - APICV_INHIBIT_REASON_X2APIC); + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_X2APIC); /* * Currently, AVIC does not work with nested virtualization. * So, we disable AVIC when cpuid for SVM is set in the L1 guest. */ if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM)) - kvm_request_apicv_update(vcpu->kvm, false, - APICV_INHIBIT_REASON_NESTED); + kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_NESTED); } init_vmcb_after_set_cpuid(vcpu); } @@ -4766,10 +4752,10 @@ static __init int svm_hardware_setup(void) } else { pr_info("TSC scaling supported\n"); kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; - kvm_tsc_scaling_ratio_frac_bits = 32; } } + kvm_max_tsc_scaling_ratio = SVM_TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e37bb3508cfa..f77a7d2d39dd 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -22,6 +22,8 @@ #include <asm/svm.h> #include <asm/sev-common.h> +#include "kvm_cache_regs.h" + #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) #define IOPM_SIZE PAGE_SIZE * 3 @@ -569,17 +571,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ -#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) -#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 -#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) - -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) -#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) -#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) -#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) - -#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL - int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -592,7 +583,7 @@ void __avic_vcpu_put(struct kvm_vcpu *vcpu); void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); -bool avic_check_apicv_inhibit_reasons(ulong bit); +bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason); void avic_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); void avic_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); bool avic_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 98aa981c04ec..8cdc62c74a96 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -4,7 +4,6 @@ */ #include <linux/kvm_host.h> -#include "kvm_cache_regs.h" #include <asm/mshyperv.h> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 193f5ba930d1..e3a24b8f04be 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1339,23 +1339,25 @@ TRACE_EVENT(kvm_hv_stimer_cleanup, __entry->vcpu_id, __entry->timer_index) ); -TRACE_EVENT(kvm_apicv_update_request, - TP_PROTO(bool activate, unsigned long bit), - TP_ARGS(activate, bit), +TRACE_EVENT(kvm_apicv_inhibit_changed, + TP_PROTO(int reason, bool set, unsigned long inhibits), + TP_ARGS(reason, set, inhibits), TP_STRUCT__entry( - __field(bool, activate) - __field(unsigned long, bit) + __field(int, reason) + __field(bool, set) + __field(unsigned long, inhibits) ), TP_fast_assign( - __entry->activate = activate; - __entry->bit = bit; + __entry->reason = reason; + __entry->set = set; + __entry->inhibits = inhibits; ), - TP_printk("%s bit=%lu", - __entry->activate ? "activate" : "deactivate", - __entry->bit) + TP_printk("%s reason=%u, inhibits=0x%lx", + __entry->set ? "set" : "cleared", + __entry->reason, __entry->inhibits) ); TRACE_EVENT(kvm_apicv_accept_irq, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 0684e519181b..bc3f8512bb64 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -389,6 +389,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct kvm_pmc *pmc; u32 msr = msr_info->index; u64 data = msr_info->data; + u64 reserved_bits; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -443,7 +444,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) return 0; - if (!(data & pmu->reserved_bits)) { + reserved_bits = pmu->reserved_bits; + if ((pmc->idx == 2) && + (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) + reserved_bits ^= HSW_IN_TX_CHECKPOINTED; + if (!(data & reserved_bits)) { reprogram_gp_counter(pmc, data); return 0; } @@ -485,6 +490,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->raw_event_mask = X86_RAW_EVENT_MASK; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry || !vcpu->kvm->arch.enable_pmu) @@ -533,8 +539,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) entry = kvm_find_cpuid_entry(vcpu, 7, 0); if (entry && (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && - (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) - pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; + (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { + pmu->reserved_bits ^= HSW_IN_TX; + pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); + } bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e8963f5af618..04d170c4b61e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2866,21 +2866,17 @@ static void enter_rmode(struct kvm_vcpu *vcpu) int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmx_uret_msr *msr = vmx_find_uret_msr(vmx, MSR_EFER); /* Nothing to do if hardware doesn't support EFER. */ - if (!msr) + if (!vmx_find_uret_msr(vmx, MSR_EFER)) return 0; vcpu->arch.efer = efer; - if (efer & EFER_LMA) { - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); - msr->data = efer; - } else { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); + if (efer & EFER_LMA) + vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE); + else + vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE); - msr->data = efer & ~EFER_LME; - } vmx_setup_uret_msrs(vmx); return 0; } @@ -2906,7 +2902,6 @@ static void enter_lmode(struct kvm_vcpu *vcpu) static void exit_lmode(struct kvm_vcpu *vcpu) { - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } @@ -7705,14 +7700,14 @@ static void vmx_hardware_unsetup(void) free_kvm_area(); } -static bool vmx_check_apicv_inhibit_reasons(ulong bit) +static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | BIT(APICV_INHIBIT_REASON_HYPERV) | BIT(APICV_INHIBIT_REASON_BLOCKIRQ); - return supported & BIT(bit); + return supported & BIT(reason); } static struct kvm_x86_ops vmx_x86_ops __initdata = { @@ -7980,12 +7975,11 @@ static __init int hardware_setup(void) if (!enable_apicv) vmx_x86_ops.sync_pir_to_irr = NULL; - if (cpu_has_vmx_tsc_scaling()) { + if (cpu_has_vmx_tsc_scaling()) kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; - kvm_tsc_scaling_ratio_frac_bits = 48; - } + kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; + kvm_tsc_scaling_ratio_frac_bits = 48; kvm_has_bus_lock_exit = cpu_has_vmx_bus_lock_detection(); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02cf0a7e1d14..0c0ca599a353 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1748,9 +1748,6 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, { struct msr_data msr; - if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return KVM_MSR_RET_FILTERED; - switch (index) { case MSR_FS_BASE: case MSR_GS_BASE: @@ -1832,9 +1829,6 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, struct msr_data msr; int ret; - if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return KVM_MSR_RET_FILTERED; - switch (index) { case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) @@ -1871,6 +1865,20 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, return ret; } +static int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data) +{ + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) + return KVM_MSR_RET_FILTERED; + return kvm_get_msr_ignored_check(vcpu, index, data, false); +} + +static int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data) +{ + if (!kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) + return KVM_MSR_RET_FILTERED; + return kvm_set_msr_ignored_check(vcpu, index, data, false); +} + int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { return kvm_get_msr_ignored_check(vcpu, index, data, false); @@ -1953,7 +1961,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu) u64 data; int r; - r = kvm_get_msr(vcpu, ecx, &data); + r = kvm_get_msr_with_filter(vcpu, ecx, &data); if (!r) { trace_kvm_msr_read(ecx, data); @@ -1978,7 +1986,7 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) u64 data = kvm_read_edx_eax(vcpu); int r; - r = kvm_set_msr(vcpu, ecx, data); + r = kvm_set_msr_with_filter(vcpu, ecx, data); if (!r) { trace_kvm_msr_write(ecx, data); @@ -5938,7 +5946,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; - kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); + kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT); r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -6335,7 +6343,7 @@ set_identity_unlock: /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; - kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT); + kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_ABSENT); create_irqchip_unlock: mutex_unlock(&kvm->lock); break; @@ -6726,7 +6734,7 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, static_call(kvm_x86_get_segment)(vcpu, var, seg); } -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, +gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -6746,7 +6754,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); @@ -6756,7 +6764,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_FETCH_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } @@ -6766,7 +6774,7 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_WRITE_MASK; return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); } @@ -6782,7 +6790,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, } static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, u32 access, + struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -6819,7 +6827,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; unsigned offset; int ret; @@ -6844,7 +6852,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception) { - u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; + u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; /* * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED @@ -6863,9 +6871,11 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt, struct x86_exception *exception, bool system) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - u32 access = 0; + u64 access = 0; - if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3) + if (system) + access |= PFERR_IMPLICIT_ACCESS; + else if (static_call(kvm_x86_get_cpl)(vcpu) == 3) access |= PFERR_USER_MASK; return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); @@ -6881,7 +6891,7 @@ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, } static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, u32 access, + struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; @@ -6915,9 +6925,11 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v bool system) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - u32 access = PFERR_WRITE_MASK; + u64 access = PFERR_WRITE_MASK; - if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3) + if (system) + access |= PFERR_IMPLICIT_ACCESS; + else if (static_call(kvm_x86_get_cpl)(vcpu) == 3) access |= PFERR_USER_MASK; return kvm_write_guest_virt_helper(addr, val, bytes, vcpu, @@ -6984,7 +6996,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, bool write) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0) + u64 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0) | (write ? PFERR_WRITE_MASK : 0); /* @@ -7627,13 +7639,13 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, return; } -static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 *pdata) +static int emulator_get_msr_with_filter(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_get_msr(vcpu, msr_index, pdata); + r = kvm_get_msr_with_filter(vcpu, msr_index, pdata); if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0, complete_emulated_rdmsr, r)) { @@ -7644,13 +7656,13 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, return r; } -static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 data) +static int emulator_set_msr_with_filter(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int r; - r = kvm_set_msr(vcpu, msr_index, data); + r = kvm_set_msr_with_filter(vcpu, msr_index, data); if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data, complete_emulated_msr_access, r)) { @@ -7661,6 +7673,18 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return r; } +static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 *pdata) +{ + return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); +} + +static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, + u32 msr_index, u64 data) +{ + return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); +} + static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); @@ -7724,6 +7748,11 @@ static bool emulator_guest_has_fxsr(struct x86_emulate_ctxt *ctxt) return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_FXSR); } +static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt) +{ + return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID); +} + static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) { return kvm_register_read_raw(emul_to_vcpu(ctxt), reg); @@ -7794,6 +7823,8 @@ static const struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .get_smbase = emulator_get_smbase, .set_smbase = emulator_set_smbase, + .set_msr_with_filter = emulator_set_msr_with_filter, + .get_msr_with_filter = emulator_get_msr_with_filter, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .check_pmc = emulator_check_pmc, @@ -7806,6 +7837,7 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_long_mode = emulator_guest_has_long_mode, .guest_has_movbe = emulator_guest_has_movbe, .guest_has_fxsr = emulator_guest_has_fxsr, + .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .exiting_smm = emulator_exiting_smm, @@ -9058,15 +9090,29 @@ bool kvm_apicv_activated(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_apicv_activated); + +static void set_or_clear_apicv_inhibit(unsigned long *inhibits, + enum kvm_apicv_inhibit reason, bool set) +{ + if (set) + __set_bit(reason, inhibits); + else + __clear_bit(reason, inhibits); + + trace_kvm_apicv_inhibit_changed(reason, set, *inhibits); +} + static void kvm_apicv_init(struct kvm *kvm) { + unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons; + init_rwsem(&kvm->arch.apicv_update_lock); - set_bit(APICV_INHIBIT_REASON_ABSENT, - &kvm->arch.apicv_inhibit_reasons); + set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true); + if (!enable_apicv) - set_bit(APICV_INHIBIT_REASON_DISABLE, - &kvm->arch.apicv_inhibit_reasons); + set_or_clear_apicv_inhibit(inhibits, + APICV_INHIBIT_REASON_ABSENT, true); } static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) @@ -9740,24 +9786,21 @@ out: } EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); -void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set) { unsigned long old, new; lockdep_assert_held_write(&kvm->arch.apicv_update_lock); - if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(bit)) + if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason)) return; old = new = kvm->arch.apicv_inhibit_reasons; - if (activate) - __clear_bit(bit, &new); - else - __set_bit(bit, &new); + set_or_clear_apicv_inhibit(&new, reason, set); if (!!old != !!new) { - trace_kvm_apicv_update_request(activate, bit); /* * Kick all vCPUs before setting apicv_inhibit_reasons to avoid * false positives in the sanity check WARN in svm_vcpu_run(). @@ -9776,20 +9819,22 @@ void __kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) unsigned long gfn = gpa_to_gfn(APIC_DEFAULT_PHYS_BASE); kvm_zap_gfn_range(kvm, gfn, gfn+1); } - } else + } else { kvm->arch.apicv_inhibit_reasons = new; + } } -void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) +void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, + enum kvm_apicv_inhibit reason, bool set) { if (!enable_apicv) return; down_write(&kvm->arch.apicv_update_lock); - __kvm_request_apicv_update(kvm, activate, bit); + __kvm_set_or_clear_apicv_inhibit(kvm, reason, set); up_write(&kvm->arch.apicv_update_lock); } -EXPORT_SYMBOL_GPL(kvm_request_apicv_update); +EXPORT_SYMBOL_GPL(kvm_set_or_clear_apicv_inhibit); static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { @@ -10937,7 +10982,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) { - bool inhibit = false; + bool set = false; struct kvm_vcpu *vcpu; unsigned long i; @@ -10945,11 +10990,11 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) { - inhibit = true; + set = true; break; } } - __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ); + __kvm_set_or_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_BLOCKIRQ, set); up_write(&kvm->arch.apicv_update_lock); } @@ -11557,10 +11602,8 @@ int kvm_arch_hardware_setup(void *opaque) u64 max = min(0x7fffffffULL, __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); kvm_max_guest_tsc_khz = max; - - kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; } - + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; kvm_init_msr_list(); return 0; } @@ -11629,12 +11672,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ret = kvm_page_track_init(kvm); if (ret) - return ret; + goto out; + + ret = kvm_mmu_init_vm(kvm); + if (ret) + goto out_page_track; INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); - INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); - INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -11666,10 +11710,14 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_apicv_init(kvm); kvm_hv_init_vm(kvm); - kvm_mmu_init_vm(kvm); kvm_xen_init_vm(kvm); return static_call(kvm_x86_vm_init)(kvm); + +out_page_track: + kvm_page_track_cleanup(kvm); +out: + return ret; } int kvm_arch_post_init_vm(struct kvm *kvm) @@ -12593,7 +12641,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; struct x86_exception fault; - u32 access = error_code & + u64 access = error_code & (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK); if (!(error_code & PFERR_PRESENT_MASK) || @@ -12933,7 +12981,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log); -EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 4aa0f2b31665..bf6cc25eee76 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -39,8 +39,8 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) } do { - ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true, - gpa, PAGE_SIZE, false); + ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN, + gpa, PAGE_SIZE); if (ret) goto out; @@ -1025,8 +1025,7 @@ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, - PAGE_SIZE, false); + rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 1f8a8f895173..50734a23034c 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -93,7 +93,6 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) buff += 8; } if (len & 7) { -#ifdef CONFIG_DCACHE_WORD_ACCESS unsigned int shift = (8 - (len & 7)) * 8; unsigned long trail; @@ -103,31 +102,6 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) "adcq $0,%[res]" : [res] "+r" (temp64) : [trail] "r" (trail)); -#else - if (len & 4) { - asm("addq %[val],%[res]\n\t" - "adcq $0,%[res]" - : [res] "+r" (temp64) - : [val] "r" ((u64)*(u32 *)buff) - : "memory"); - buff += 4; - } - if (len & 2) { - asm("addq %[val],%[res]\n\t" - "adcq $0,%[res]" - : [res] "+r" (temp64) - : [val] "r" ((u64)*(u16 *)buff) - : "memory"); - buff += 2; - } - if (len & 1) { - asm("addq %[val],%[res]\n\t" - "adcq $0,%[res]" - : [res] "+r" (temp64) - : [val] "r" ((u64)*(u8 *)buff) - : "memory"); - } -#endif } result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); if (unlikely(odd)) { diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index ead7e5b3a975..1bcd42c53039 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -9,6 +9,7 @@ endmenu config UML_X86 def_bool y select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 + select DCACHE_WORD_ACCESS config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h index 48d6cd12f8a5..b6b997225841 100644 --- a/arch/x86/um/shared/sysdep/syscalls_64.h +++ b/arch/x86/um/shared/sysdep/syscalls_64.h @@ -10,13 +10,12 @@ #include <linux/msg.h> #include <linux/shm.h> -typedef long syscall_handler_t(void); +typedef long syscall_handler_t(long, long, long, long, long, long); extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ - (((long (*)(long, long, long, long, long, long)) \ - (*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ + (((*sys_call_table[syscall]))(UPT_SYSCALL_ARG1(®s->regs), \ UPT_SYSCALL_ARG2(®s->regs), \ UPT_SYSCALL_ARG3(®s->regs), \ UPT_SYSCALL_ARG4(®s->regs), \ diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index fe5323f0c42d..27b29ae6c471 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -12,7 +12,6 @@ #include <asm/prctl.h> /* XXX This should get the constants from libc */ #include <registers.h> #include <os.h> -#include <registers.h> long arch_prctl(struct task_struct *task, int option, unsigned long __user *arg2) diff --git a/arch/xtensa/kernel/syscalls/Makefile b/arch/xtensa/kernel/syscalls/Makefile index 6713c65a25e1..b265e4bc16c2 100644 --- a/arch/xtensa/kernel/syscalls/Makefile +++ b/arch/xtensa/kernel/syscalls/Makefile @@ -2,8 +2,7 @@ kapi := arch/$(SRCARCH)/include/generated/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm -_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \ - $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +$(shell mkdir -p $(uapi) $(kapi)) syscall := $(src)/syscall.tbl syshdr := $(srctree)/scripts/syscallhdr.sh |