diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2021-08-31 09:10:00 +0100 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2021-08-31 09:10:00 +0100 |
commit | 65266a7c6abfa1ad915a362c41bf38576607f1f9 (patch) | |
tree | 046d86fc88c50fcdeaddb9ab4c709ae0878b1e3f /arch | |
parent | 1a7f67e618d42e9870dcd9fb0c7b2682d71fd631 (diff) | |
parent | 702f43872665e3b1cc6fdb77d238533274fc9d18 (diff) | |
download | linux-65266a7c6abfa1ad915a362c41bf38576607f1f9.tar.bz2 |
Merge remote-tracking branch 'tip/sched/arm64' into for-next/core
* tip/sched/arm64: (785 commits)
Documentation: arm64: describe asymmetric 32-bit support
arm64: Remove logic to kill 32-bit tasks on 64-bit-only cores
arm64: Hook up cmdline parameter to allow mismatched 32-bit EL0
arm64: Advertise CPUs capable of running 32-bit applications in sysfs
arm64: Prevent offlining first CPU with 32-bit EL0 on mismatched system
arm64: exec: Adjust affinity for compat tasks with mismatched 32-bit EL0
arm64: Implement task_cpu_possible_mask()
sched: Introduce dl_task_check_affinity() to check proposed affinity
sched: Allow task CPU affinity to be restricted on asymmetric systems
sched: Split the guts of sched_setaffinity() into a helper function
sched: Introduce task_struct::user_cpus_ptr to track requested affinity
sched: Reject CPU affinity changes based on task_cpu_possible_mask()
cpuset: Cleanup cpuset_cpus_allowed_fallback() use in select_fallback_rq()
cpuset: Honour task_cpu_possible_mask() in guarantee_online_cpus()
cpuset: Don't use the cpu_possible_mask as a last resort for cgroup v1
sched: Introduce task_cpu_possible_mask() to limit fallback rq selection
sched: Cgroup SCHED_IDLE support
sched/topology: Skip updating masks for non-online nodes
Linux 5.14-rc6
lib: use PFN_PHYS() in devmem_is_allowed()
...
Diffstat (limited to 'arch')
139 files changed, 826 insertions, 437 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..6c50877841df 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,7 +14,6 @@ config ALPHA select PCI_SYSCALL if PCI select HAVE_AOUT select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE @@ -532,7 +531,7 @@ config SMP will run faster if you say N here. See also the SMP-HOWTO available at - <http://www.tldp.org/docs.html#howto>. + <https://www.tldp.org/docs.html#howto>. If you don't know what to do here, say N. diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 00266e6e1b71..b4faba2432d5 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -23,7 +23,7 @@ #include "ksize.h" extern unsigned long switch_to_osf_pal(unsigned long nr, - struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa, unsigned long *vptb); extern void move_stack(unsigned long new_stack); diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 43af71835adf..90a2b341e9c0 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -200,7 +200,7 @@ extern char _end; START_ADDR KSEG address of the entry point of kernel code. ZERO_PGE KSEG address of page full of zeroes, but - upon entry to kerne cvan be expected + upon entry to kernel, it can be expected to hold the parameter list and possible INTRD information. diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index d65192202703..325d4dd4f904 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...) __attribute__ ((format (printf, 1, 2))); /* - * gzip delarations + * gzip declarations */ #define OF(args) args #define STATIC static diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index dd2dd9f0861f..7f1ca30b115b 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y CONFIG_ALPHA_LEGACY_START_ADDRESS=y CONFIG_MATHEMU=y CONFIG_CRYPTO_HMAC=y +CONFIG_DEVTMPFS=y diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h index 5159ba259d65..ae645959018a 100644 --- a/arch/alpha/include/asm/compiler.h +++ b/arch/alpha/include/asm/compiler.h @@ -4,15 +4,4 @@ #include <uapi/asm/compiler.h> -/* Some idiots over in <linux/compiler.h> thought inline should imply - always_inline. This breaks stuff. We'll include this file whenever - we run into such problems. */ - -#include <linux/compiler.h> -#undef inline -#undef __inline__ -#undef __inline -#undef __always_inline -#define __always_inline inline __attribute__((always_inline)) - #endif /* __ALPHA_COMPILER_H */ diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index 11c688c1d7ec..f21babaeed85 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_ALPHA; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r0; +} + #endif /* _ASM_ALPHA_SYSCALL_H */ diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d5367a1c6300..d31167e3269c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, return -EFAULT; state = ¤t_thread_info()->ieee_state; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK); /* Update the real fpcr. */ @@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, state = ¤t_thread_info()->ieee_state; exc &= IEEE_STATUS_MASK; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ swcr = (*state & IEEE_SW_MASK) | exc; *state |= exc; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index e7a59d927d78..efcf7321701b 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags) * Check that CPU performance counters are supported. * - currently support EV67 and later CPUs. * - actually some later revisions of the EV6 have the same PMC model as the - * EV67 but we don't do suffiently deep CPU detection to detect them. + * EV67 but we don't do sufficiently deep CPU detection to detect them. * Bad luck to the very few people who might have one, I guess. */ static int supported_cpu(void) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index ef0c08ed0481..a5123ea426ce 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ childstack->r10 = kthread_arg; - childregs->hae = alpha_mv.hae_cache, + childregs->hae = alpha_mv.hae_cache; childti->pcb.usp = 0; return 0; } diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 7d56c217b235..b4fbbba30aa2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -319,18 +319,19 @@ setup_memory(void *kernel_end) i, cluster->usage, cluster->start_pfn, cluster->start_pfn + cluster->numpages); - /* Bit 0 is console/PALcode reserved. Bit 1 is - non-volatile memory -- we might want to mark - this for later. */ - if (cluster->usage & 3) - continue; - end = cluster->start_pfn + cluster->numpages; if (end > max_low_pfn) max_low_pfn = end; memblock_add(PFN_PHYS(cluster->start_pfn), cluster->numpages << PAGE_SHIFT); + + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + memblock_reserve(PFN_PHYS(cluster->start_pfn), + cluster->numpages << PAGE_SHIFT); } /* diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 4b2575f936d4..cb64e4797d2a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -582,7 +582,7 @@ void smp_send_stop(void) { cpumask_t to_whom; - cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_copy(&to_whom, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 53adf43dcd44..96fd6ff3fe81 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -212,7 +212,7 @@ nautilus_init_pci(void) /* Use default IO. */ pci_add_resource(&bridge->windows, &ioport_resource); - /* Irongate PCI memory aperture, calculate requred size before + /* Irongate PCI memory aperture, calculate required size before setting it up. */ pci_add_resource(&bridge->windows, &irongate_mem); diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 921d4b6e4d95..5398f982bdd1 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, long error; /* Check the UAC bits to decide what the user wants us to do - with the unaliged access. */ + with the unaligned access. */ if (!(current_thread_info()->status & TS_UAC_NOPRINT)) { if (__ratelimit(&ratelimit)) { diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index d568cd9a3e43..f7cef66af88d 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc); long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); long do_alpha_fp_emul(unsigned long); -int init_module(void) +static int alpha_fp_emul_init_module(void) { save_emul_imprecise = alpha_fp_emul_imprecise; save_emul = alpha_fp_emul; @@ -73,12 +73,14 @@ int init_module(void) alpha_fp_emul = do_alpha_fp_emul; return 0; } +module_init(alpha_fp_emul_init_module); -void cleanup_module(void) +static void alpha_fp_emul_cleanup_module(void) { alpha_fp_emul_imprecise = save_emul_imprecise; alpha_fp_emul = save_emul; } +module_exit(alpha_fp_emul_cleanup_module); #undef alpha_fp_emul_imprecise #define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise @@ -401,3 +403,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } + +EXPORT_SYMBOL(__udiv_qrnnd); diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963b..b5bf68e74732 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,7 +409,7 @@ choice help Depending on the configuration, CPU can contain DSP registers (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL). - Bellow is options describing how to handle these registers in + Below are options describing how to handle these registers in interrupt entry / exit and in context switch. config ARC_DSP_NONE diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 69debd77cd04..0b485800a392 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -24,7 +24,7 @@ */ static inline __sum16 csum_fold(__wsum s) { - unsigned r = s << 16 | s >> 16; /* ror */ + unsigned int r = s << 16 | s >> 16; /* ror */ s = ~s; s -= r; return s >> 16; diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 30b9ae511ea9..e1971d34ef30 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = { #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xffff -static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index c67c0f0f5f77..ec640219d989 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) void fpu_init_task(struct pt_regs *regs) { + const unsigned int fwe = 0x80000000; + /* default rounding mode */ write_aux_reg(ARC_REG_FPU_CTRL, 0x100); - /* set "Write enable" to allow explicit write to exception flags */ - write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); + /* Initialize to zero: setting requires FWE be set */ + write_aux_reg(ARC_REG_FPU_STATUS, fwe); } void fpu_save_restore(struct task_struct *prev, struct task_struct *next) { struct arc_fpu *save = &prev->thread.fpu; struct arc_fpu *restore = &next->thread.fpu; + const unsigned int fwe = 0x80000000; save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); save->status = read_aux_reg(ARC_REG_FPU_STATUS); write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); - write_aux_reg(ARC_REG_FPU_STATUS, restore->status); + write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status)); } #endif diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 47bab67f8649..9e28058cdba8 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table, { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; - unsigned n; + unsigned int n; const u32 *fde; struct { u8 version; @@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; uleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; sleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, static signed fde_pointer_type(const u32 *cie) { const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; + unsigned int version = *ptr; if (*++ptr) { const char *aug; @@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame) const u8 *ptr = NULL, *end = NULL; unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; + unsigned int i; signed ptrType = -1; uleb128_t retAddrReg = 0; const struct unwind_table *table; diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index e2146a8da195..529ae50f9fe2 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 82f908fa5676..2fb7012c3246 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -95,7 +95,6 @@ config ARM select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) - select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE bool "FootBridge" select CPU_SA110 select FOOTBRIDGE - select HAVE_IDE select NEED_MACH_IO_H if !MMU select NEED_MACH_MEMORY_H help @@ -430,7 +428,6 @@ config ARCH_PXA select GENERIC_IRQ_MULTI_HANDLER select GPIO_PXA select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select PLAT_PXA select SPARSE_IRQ @@ -446,7 +443,6 @@ config ARCH_RPC select ARM_HAS_SG_CHAIN select CPU_SA110 select FIQ - select HAVE_IDE select HAVE_PATA_PLATFORM select ISA_DMA_API select LEGACY_TIMER_TICK @@ -469,7 +465,6 @@ config ARCH_SA1100 select CPU_SA1100 select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select ISA select NEED_MACH_MEMORY_H @@ -505,7 +500,6 @@ config ARCH_OMAP1 select GENERIC_IRQ_CHIP select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select HAVE_LEGACY_CLK select IRQ_DOMAIN select NEED_MACH_IO_H if PCCARD diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 40ef3973f2a9..ba58e6b0da1d 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1595,7 +1595,7 @@ compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; clocks = <&dcan1_fck>; - clock-name = "fck"; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index aae0af10a5b1..2aa75abf85a9 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -582,7 +582,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index f98691ae4415..d3082b9774e4 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -388,13 +388,13 @@ pinctrl_power_button: powerbutgrp { fsl,pins = < - MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 + MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 >; }; pinctrl_power_out: poweroutgrp { fsl,pins = < - MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 + MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 0ad8ccde0cf8..f86efd0ccc40 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -54,7 +54,13 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; phy-mode = "rgmii-id"; - phy-reset-duration = <2>; + + /* + * The PHY seems to require a long-enough reset duration to avoid + * some rare issues where the PHY gets stuck in an inconsistent and + * non-functional state at boot-up. 10ms proved to be fine . + */ + phy-reset-duration = <10>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi index a0545431b3dc..9f1e38282bee 100644 --- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi @@ -43,6 +43,7 @@ assigned-clock-rates = <0>, <198000000>; cap-power-off-card; keep-power-in-suspend; + max-frequency = <25000000>; mmc-pwrseq = <&wifi_pwrseq>; no-1-8-v; non-removable; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 45435bb88c89..373984c130e0 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -30,14 +30,6 @@ regulator-max-microvolt = <5000000>; }; - vdds_1v8_main: fixedregulator-vdds_1v8_main { - compatible = "regulator-fixed"; - regulator-name = "vdds_1v8_main"; - vin-supply = <&smps7_reg>; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - }; - vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -487,6 +479,7 @@ regulator-boot-on; }; + vdds_1v8_main: smps7_reg: smps7 { /* VDDS_1v8_OMAP over VDDS_1v8_MAIN */ regulator-name = "smps7"; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index c9b906432341..1815361fe73c 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index c5ea08fec535..6cf1c8b4c6e2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -37,7 +37,7 @@ poll-interval = <20>; /* - * The EXTi IRQ line 3 is shared with touchscreen and ethernet, + * The EXTi IRQ line 3 is shared with ethernet, * so mark this as polled GPIO key. */ button-0 { @@ -47,6 +47,16 @@ }; /* + * The EXTi IRQ line 6 is shared with touchscreen, + * so mark this as polled GPIO key. + */ + button-1 { + label = "TA2-GPIO-B"; + linux,code = <KEY_B>; + gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; + }; + + /* * The EXTi IRQ line 0 is shared with PMIC, * so mark this as polled GPIO key. */ @@ -60,13 +70,6 @@ gpio-keys { compatible = "gpio-keys"; - button-1 { - label = "TA2-GPIO-B"; - linux,code = <KEY_B>; - gpios = <&gpiod 6 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = <KEY_D>; @@ -82,6 +85,7 @@ label = "green:led5"; gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; + status = "disabled"; }; led-1 { @@ -185,8 +189,8 @@ touchscreen@38 { compatible = "edt,edt-ft5406"; reg = <0x38>; - interrupt-parent = <&gpiog>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ + interrupt-parent = <&gpioc>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; /* GPIO E */ }; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index 2af0a6752674..8c41f819f776 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -12,6 +12,8 @@ aliases { ethernet0 = ðernet0; ethernet1 = &ksz8851; + rtc0 = &hwrtc; + rtc1 = &rtc; }; memory@c0000000 { @@ -138,6 +140,7 @@ reset-gpios = <&gpioh 3 GPIO_ACTIVE_LOW>; reset-assert-us = <500>; reset-deassert-us = <500>; + smsc,disable-energy-detect; interrupt-parent = <&gpioi>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; }; @@ -248,7 +251,7 @@ /delete-property/dmas; /delete-property/dma-names; - rtc@32 { + hwrtc: rtc@32 { compatible = "microcrystal,rv8803"; reg = <0x32>; }; diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index de11030748d0..1d3aef84287d 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO select RESET_CONTROLLER - select HAVE_IDE select PINCTRL_SINGLE if ARCH_DAVINCI diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index f0a073a71401..13f3068e9845 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -68,7 +68,6 @@ void imx_set_cpu_arg(int cpu, u32 arg); void v7_secondary_startup(void); void imx_scu_map_io(void); void imx_smp_prepare(void); -void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); #else static inline void imx_scu_map_io(void) {} static inline void imx_smp_prepare(void) {} @@ -81,6 +80,7 @@ void imx_gpc_mask_all(void); void imx_gpc_restore_all(void); void imx_gpc_hwirq_mask(unsigned int hwirq); void imx_gpc_hwirq_unmask(unsigned int hwirq); +void imx_gpcv2_set_core1_pdn_pup_by_software(bool pdn); void imx_anatop_init(void); void imx_anatop_pre_suspend(void); void imx_anatop_post_resume(void); diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 0dfd0ae7a63d..af12668d0bf5 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -103,6 +103,7 @@ struct mmdc_pmu { struct perf_event *mmdc_events[MMDC_NUM_COUNTERS]; struct hlist_node node; struct fsl_mmdc_devtype_data *devtype_data; + struct clk *mmdc_ipg_clk; }; /* @@ -462,11 +463,14 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); + iounmap(pmu_mmdc->mmdc_base); + clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk); kfree(pmu_mmdc); return 0; } -static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base) +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base, + struct clk *mmdc_ipg_clk) { struct mmdc_pmu *pmu_mmdc; char *name; @@ -494,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b } mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); + pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; if (mmdc_num == 0) name = "mmdc"; else @@ -529,7 +534,7 @@ pmu_free: #else #define imx_mmdc_remove NULL -#define imx_mmdc_perf_init(pdev, mmdc_base) 0 +#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0 #endif static int imx_mmdc_probe(struct platform_device *pdev) @@ -567,7 +572,13 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - return imx_mmdc_perf_init(pdev, mmdc_base); + err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk); + if (err) { + iounmap(mmdc_base); + clk_disable_unprepare(mmdc_ipg_clk); + } + + return err; } int imx_mmdc_get_ddr_type(void) diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index bf14d65120b9..34a1c7742088 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -91,6 +91,7 @@ config MACH_IXDP465 config MACH_GORAMO_MLR bool "GORAMO Multi Link Router" + depends on IXP4XX_PCI_LEGACY help Say 'Y' here if you want your kernel to support GORAMO MultiLink router. diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 65934b2924fb..12b26e04686f 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3776,6 +3776,7 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) struct omap_hwmod_ocp_if *oi; struct clockdomain *clkdm; struct clk_hw_omap *clk; + struct clk_hw *hw; if (!oh) return NULL; @@ -3792,7 +3793,14 @@ struct powerdomain *omap_hwmod_get_pwrdm(struct omap_hwmod *oh) c = oi->_clk; } - clk = to_clk_hw_omap(__clk_get_hw(c)); + hw = __clk_get_hw(c); + if (!hw) + return NULL; + + clk = to_clk_hw_omap(hw); + if (!clk) + return NULL; + clkdm = clk->clkdm; if (!clkdm) return NULL; diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index d23970bd638d..f70fb9c4b0cb 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag) fallthrough; /* ??? */ case 256: vram_size += PAGE_SIZE * 256; + break; default: break; } diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 897634d0a67c..a951276f0547 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1602,6 +1602,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b5b13a932561..fdcd54d39c1e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1800,11 +1800,11 @@ config RANDOMIZE_BASE If unsure, say N. config RANDOMIZE_MODULE_REGION_FULL - bool "Randomize the module region over a 4 GB range" + bool "Randomize the module region over a 2 GB range" depends on RANDOMIZE_BASE default y help - Randomizes the location of the module region inside a 4 GB window + Randomizes the location of the module region inside a 2 GB window covering the core kernel. This way, it is less likely for modules to leak information about the location of core kernel data structures but it does imply that function calls between modules and the core @@ -1812,7 +1812,10 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the - core kernel, so branch relocations are always in range. + core kernel, so branch relocations are almost always in range unless + ARM64_MODULE_PLTS is enabled and the region is exhausted. In this + particular case of region exhaustion, modules might be able to fall + back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index ded13230f901..8b0fd9d92390 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -21,19 +21,11 @@ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) - ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) -$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum) - else + ifeq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) - ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) -$(warning LSE atomics not supported by binutils) - endif -endif - cc_has_k_constraint := $(call try-run,echo \ 'int main(void) { \ asm volatile("and w0, w0, %w0" :: "K" (4294967295)); \ @@ -179,6 +171,17 @@ vdso_install: archprepare: $(Q)$(MAKE) $(build)=arch/arm64/tools kapi +ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) + ifneq ($(CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419),y) + @echo "warning: ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum" >&2 + endif +endif +ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS),y) + ifneq ($(CONFIG_ARM64_LSE_ATOMICS),y) + @echo "warning: LSE atomics not supported by binutils" >&2 + endif +endif + # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts index dd764b720fb0..f6a79c8080d1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var2.dts @@ -54,6 +54,7 @@ &mscc_felix_port0 { label = "swp0"; + managed = "in-band-status"; phy-handle = <&phy0>; phy-mode = "sgmii"; status = "okay"; @@ -61,6 +62,7 @@ &mscc_felix_port1 { label = "swp1"; + managed = "in-band-status"; phy-handle = <&phy1>; phy-mode = "sgmii"; status = "okay"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b2e3e5d2a108..343ecf0e8973 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -66,7 +66,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index ce2bcddf396f..a05b1ab2dd12 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -19,6 +19,8 @@ aliases { spi0 = &spi0; ethernet1 = ð1; + mmc0 = &sdhci0; + mmc1 = &sdhci1; }; chosen { @@ -119,6 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 076d5efc4c3d..5ba7a4519b95 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1840,7 +1840,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE1R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE1W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE1>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE1 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14120000 { @@ -1890,7 +1894,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE2AR &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE2AW &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE2>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE2 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14140000 { @@ -1940,7 +1948,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE3R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE3W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE3>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE3 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14160000 { @@ -1990,7 +2002,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@14180000 { @@ -2040,7 +2056,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie@141a0000 { @@ -2094,7 +2114,11 @@ interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; - interconnect-names = "read", "write"; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14160000 { @@ -2127,6 +2151,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE4R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE4W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE4>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE4 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@14180000 { @@ -2159,6 +2191,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE0R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE0W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE0>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE0 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; pcie_ep@141a0000 { @@ -2194,6 +2234,14 @@ nvidia,aspm-cmrt-us = <60>; nvidia,aspm-pwr-on-t-us = <20>; nvidia,aspm-l0s-entrance-latency-us = <3>; + + interconnects = <&mc TEGRA194_MEMORY_CLIENT_PCIE5R &emc>, + <&mc TEGRA194_MEMORY_CLIENT_PCIE5W &emc>; + interconnect-names = "dma-mem", "write"; + iommus = <&smmu TEGRA194_SID_PCIE5>; + iommu-map = <0x0 &smmu TEGRA194_SID_PCIE5 0x1000>; + iommu-map-mask = <0x0>; + dma-coherent; }; sram@40000000 { diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 8d1c8dcb87fd..97932fbf973d 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -213,10 +213,8 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; /* AArch32 EABI. */ #define EF_ARM_EABI_MASK 0xff000000 -#define compat_elf_check_arch(x) (system_supports_32bit_el0() && \ - ((x)->e_machine == EM_ARM) && \ - ((x)->e_flags & EF_ARM_EABI_MASK)) - +int compat_elf_check_arch(const struct elf32_hdr *); +#define compat_elf_check_arch compat_elf_check_arch #define compat_start_thread compat_start_thread /* * Unlike the native SET_PERSONALITY macro, the compat version maintains diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index eeb210997149..f4ba93d4ffeb 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -231,6 +231,19 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, update_saved_ttbr0(tsk, next); } +static inline const struct cpumask * +task_cpu_possible_mask(struct task_struct *p) +{ + if (!static_branch_unlikely(&arm64_mismatched_32bit_el0)) + return cpu_possible_mask; + + if (!is_compat_thread(task_thread_info(p))) + return cpu_possible_mask; + + return system_32bit_el0_cpumask(); +} +#define task_cpu_possible_mask task_cpu_possible_mask + void verify_cpu_asid_bits(void); void post_ttbr_update_workaround(void); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index e58bca832dff..41b332c054ab 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -320,7 +320,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 1801399204d7..8aebc00c1718 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -35,7 +35,7 @@ struct stack_info { * accounting information necessary for robust unwinding. * * @fp: The fp value in the frame record (or the real fp) - * @pc: The fp value in the frame record (or the real lr) + * @pc: The lr value in the frame record (or the real lr) * * @stacks_done: Stacks which have been entirely unwound, for which it is no * longer valid to unwind to. diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index cfc0672013f6..03e20895453a 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,22 +29,23 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } - -static inline long syscall_get_error(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) { - unsigned long error = regs->regs[0]; + unsigned long val = regs->regs[0]; if (is_compat_thread(task_thread_info(task))) - error = sign_extend64(error, 31); + val = sign_extend64(val, 31); - return IS_ERR_VALUE(error) ? error : 0; + return val; } -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) { - return regs->regs[0]; + unsigned long error = syscall_get_return_value(task, regs); + + return IS_ERR_VALUE(error) ? error : 0; } static inline void syscall_set_return_value(struct task_struct *task, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 23b1e3d83603..b2770d753ba3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -67,6 +67,7 @@ #include <linux/crash_dump.h> #include <linux/sort.h> #include <linux/stop_machine.h> +#include <linux/sysfs.h> #include <linux/types.h> #include <linux/minmax.h> #include <linux/mm.h> @@ -1321,6 +1322,31 @@ const struct cpumask *system_32bit_el0_cpumask(void) return cpu_possible_mask; } +static int __init parse_32bit_el0_param(char *str) +{ + allow_mismatched_32bit_el0 = true; + return 0; +} +early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param); + +static ssize_t aarch32_el0_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct cpumask *mask = system_32bit_el0_cpumask(); + + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask)); +} +static const DEVICE_ATTR_RO(aarch32_el0); + +static int __init aarch32_el0_sysfs_init(void) +{ + if (!allow_mismatched_32bit_el0) + return 0; + + return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0); +} +device_initcall(aarch32_el0_sysfs_init); + static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) { if (!has_cpuid_feature(entry, scope)) @@ -2902,15 +2928,38 @@ void __init setup_cpu_features(void) static int enable_mismatched_32bit_el0(unsigned int cpu) { + /* + * The first 32-bit-capable CPU we detected and so can no longer + * be offlined by userspace. -1 indicates we haven't yet onlined + * a 32-bit-capable CPU. + */ + static int lucky_winner = -1; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); if (cpu_32bit) { cpumask_set_cpu(cpu, cpu_32bit_el0_mask); static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); - setup_elf_hwcaps(compat_elf_hwcaps); } + if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit) + return 0; + + if (lucky_winner >= 0) + return 0; + + /* + * We've detected a mismatch. We need to keep one of our CPUs with + * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting + * every CPU in the system for a 32-bit task. + */ + lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask, + cpu_active_mask); + get_cpu_device(lucky_winner)->offline_disabled = true; + setup_elf_hwcaps(compat_elf_hwcaps); + pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", + cpu, lucky_winner); return 0; } diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index cfa2cfde3019..418b2bba1521 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -162,7 +162,9 @@ u64 __init kaslr_early_init(void) * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, * _stext) . This guarantees that the resulting region still * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers. + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. */ module_range = MODULES_VSIZE - (u64)(_etext - _stext); module_alloc_base = (u64)_etext + offset - MODULES_VSIZE; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5464d575192b..2bd270cd603e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -21,6 +21,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/nospec.h> +#include <linux/sched.h> #include <linux/stddef.h> #include <linux/sysctl.h> #include <linux/unistd.h> @@ -472,15 +473,6 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, write_sysreg(val, cntkctl_el1); } -static void compat_thread_switch(struct task_struct *next) -{ - if (!is_compat_thread(task_thread_info(next))) - return; - - if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) - set_tsk_thread_flag(next, TIF_NOTIFY_RESUME); -} - /* * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore * this function must be called with preemption disabled and the update to @@ -515,7 +507,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); ptrauth_thread_switch_user(next); - compat_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -576,6 +567,28 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } +#ifdef CONFIG_COMPAT +int compat_elf_check_arch(const struct elf32_hdr *hdr) +{ + if (!system_supports_32bit_el0()) + return false; + + if ((hdr)->e_machine != EM_ARM) + return false; + + if (!((hdr)->e_flags & EF_ARM_EABI_MASK)) + return false; + + /* + * Prevent execve() of a 32-bit program from a deadline task + * if the restricted affinity mask would be inadmissible on an + * asymmetric system. + */ + return !static_branch_unlikely(&arm64_mismatched_32bit_el0) || + !dl_task_check_affinity(current, system_32bit_el0_cpumask()); +} +#endif + /* * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY. */ @@ -585,8 +598,20 @@ void arch_setup_new_exec(void) if (is_compat_task()) { mmflags = MMCF_AARCH32; + + /* + * Restrict the CPU affinity mask for a 32-bit task so that + * it contains only 32-bit-capable CPUs. + * + * From the perspective of the task, this looks similar to + * what would happen if the 64-bit-only CPUs were hot-unplugged + * at the point of execve(), although we try a bit harder to + * honour the cpuset hierarchy. + */ if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + force_compatible_cpus_allowed_ptr(current); + } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) { + relax_compatible_cpus_allowed_ptr(current); } current->mm->context.flags = mmflags; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 956e2df27180..e26196a33cf4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1867,7 +1867,7 @@ void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (flags & _TIF_SYSCALL_TRACEPOINT) - trace_sys_exit(regs, regs_return_value(regs)); + trace_sys_exit(regs, syscall_get_return_value(current, regs)); if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e93ffd7d38e1..fb54fb76e17f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include <asm/unistd.h> #include <asm/fpsimd.h> #include <asm/ptrace.h> +#include <asm/syscall.h> #include <asm/signal32.h> #include <asm/traps.h> #include <asm/vdso.h> @@ -895,7 +896,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } @@ -916,19 +917,6 @@ static void do_signal(struct pt_regs *regs) restore_saved_sigmask(); } -static bool cpu_affinity_invalid(struct pt_regs *regs) -{ - if (!compat_user_mode(regs)) - return false; - - /* - * We're preemptible, but a reschedule will cause us to check the - * affinity again. - */ - return !cpumask_test_cpu(raw_smp_processor_id(), - system_32bit_el0_cpumask()); -} - void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { @@ -955,19 +943,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) if (thread_flags & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); - - /* - * If we reschedule after checking the affinity - * then we must ensure that TIF_NOTIFY_RESUME - * is set so that we check the affinity again. - * Since tracehook_notify_resume() clears the - * flag, ensure that the compiler doesn't move - * it after the affinity check. - */ - barrier(); - - if (cpu_affinity_invalid(regs)) - force_sig(SIGKILL); } if (thread_flags & _TIF_FOREIGN_FPSTATE) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index b83c8d911930..8982a2b78acf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -218,7 +218,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, +noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 263d6c1a525f..50a0f1a38e84 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -54,10 +54,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - if (is_compat_task()) - ret = lower_32_bits(ret); - - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), @@ -115,7 +112,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * syscall. do_notify_resume() will send a signal to userspace * before the syscall is restarted. */ - regs->regs[0] = -ERESTARTNOINTR; + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); return; } @@ -136,7 +133,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * anyway. */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..0625bf2353c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_PFNMAP); + shared = (vma->vm_flags & VM_SHARED); switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e62..c30baa0499fc 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -44,7 +44,6 @@ config H8300_H8MAX bool "H8MAX" select H83069 select RAMKERNEL - select HAVE_IDE help H8MAX Evaluation Board Support More Information. (Japanese Only) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cf425c2c63af..4993c7ac7ff6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,7 +25,6 @@ config IA64 select HAVE_ASM_MODVERSIONS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_EXIT_THREAD - select HAVE_IDE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 96989ad46f66..d632a1d576f9 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,7 +23,6 @@ config M68K select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED select HAVE_FUTEX_CMPXCHG if MMU && FUTEX - select HAVE_IDE select HAVE_MOD_ARCH_SPECIFIC select HAVE_UID16 select MMU_GATHER_NO_RANGE if MMU diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 2c4d2ca2f20d..485375112e28 100644 --- a/arch/m68k/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK); DEFINE_CLK(sys, "sys.0", MCF_BUSCLK); static struct clk_lookup m525x_clk_lookup[] = { - CLKDEV_INIT(NULL, "pll.0", &pll), + CLKDEV_INIT(NULL, "pll.0", &clk_pll), CLKDEV_INIT(NULL, "sys.0", &clk_sys), CLKDEV_INIT("mcftmr.0", NULL, &clk_sys), CLKDEV_INIT("mcftmr.1", NULL, &clk_sys), diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..6dfb27d531dd 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -71,7 +71,6 @@ config MIPS select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 4e942b7ef022..653befc1b176 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -321,7 +321,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 4b2567d6b2df..c7925d0e9874 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -58,15 +58,20 @@ do { \ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *pmd = NULL; + pmd_t *pmd; struct page *pg; - pg = alloc_pages(GFP_KERNEL | __GFP_ACCOUNT, PMD_ORDER); - if (pg) { - pgtable_pmd_page_ctor(pg); - pmd = (pmd_t *)page_address(pg); - pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); + pg = alloc_pages(GFP_KERNEL_ACCOUNT, PMD_ORDER); + if (!pg) + return NULL; + + if (!pgtable_pmd_page_ctor(pg)) { + __free_pages(pg, PMD_ORDER); + return NULL; } + + pmd = (pmd_t *)page_address(pg); + pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table); return pmd; } diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index ee7471984fe7..4ffbcc58c6f6 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -48,7 +48,8 @@ static struct plat_serial8250_port uart8250_data[] = { .mapbase = 0x1f000900, /* The CBUS UART */ .irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2, .uartclk = 3686400, /* Twice the usual clk! */ - .iotype = UPIO_MEM32, + .iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? + UPIO_MEM32BE : UPIO_MEM32, .flags = CBUS_UART_FLAGS, .regshift = 3, }, diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 939dd06764bc..3a73e9375712 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ jeq_common: } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bde9907bc5b2..4f8c1fbf8f2f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -3,7 +3,6 @@ config PARISC def_bool y select ARCH_32BIT_OFF_T if !64BIT select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_IDE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..663766fbf505 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -220,7 +220,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bdf7d274ac..6b800d3e2681 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -583,6 +583,9 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +/* irq.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ); + void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4982f3711fc3..2b3278534bc1 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -52,7 +52,7 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -extern void do_IRQ(struct pt_regs *regs); +void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 3e5d470a6155..14422e851494 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,22 @@ struct pt_regs unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; #endif +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + struct { /* Must be a multiple of 16 bytes */ + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + }; +#endif }; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a47eefa09bcb..5bee245d832b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -309,24 +309,21 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#if defined(CONFIG_PPC32) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); - DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + STACK_PT_REGS_OFFSET(MAS0, mas0); /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ - DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); - DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); - DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); - DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); - DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); - DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); - DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); - DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); - DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); - DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); - DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); - DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); -#endif + STACK_PT_REGS_OFFSET(MMUCR, mas0); + STACK_PT_REGS_OFFSET(MAS1, mas1); + STACK_PT_REGS_OFFSET(MAS2, mas2); + STACK_PT_REGS_OFFSET(MAS3, mas3); + STACK_PT_REGS_OFFSET(MAS6, mas6); + STACK_PT_REGS_OFFSET(MAS7, mas7); + STACK_PT_REGS_OFFSET(_SRR0, srr0); + STACK_PT_REGS_OFFSET(_SRR1, srr1); + STACK_PT_REGS_OFFSET(_CSRR0, csrr0); + STACK_PT_REGS_OFFSET(_CSRR1, csrr1); + STACK_PT_REGS_OFFSET(_DSRR0, dsrr0); + STACK_PT_REGS_OFFSET(_DSRR1, dsrr1); #endif /* About the CPU features table */ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 764edd860ed4..68e5c0a7e99d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 prepare_transfer_to_handler - lwz r5, _DSISR(r11) + lwz r5, _DSISR(r1) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f bl do_page_fault diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 87b806e8eded..e5503420b6c6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -168,20 +168,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx -#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) - #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ slwi r8,r8,2; \ addis r8,r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ lis r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #endif /* @@ -208,7 +206,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\ beq 1f; \ /* COMING FROM USER MODE */ \ stw r9,_CCR(r11); /* save CR */\ @@ -516,24 +514,5 @@ label: bl kernel_fp_unavailable_exception; \ b interrupt_return -#else /* __ASSEMBLY__ */ -struct exception_regs { - unsigned long mas0; - unsigned long mas1; - unsigned long mas2; - unsigned long mas3; - unsigned long mas6; - unsigned long mas7; - unsigned long srr0; - unsigned long srr1; - unsigned long csrr0; - unsigned long csrr1; - unsigned long dsrr0; - unsigned long dsrr1; -}; - -/* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) - #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 91e63eac4e8f..551b653228c4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -750,7 +750,7 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } -DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; @@ -774,6 +774,11 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) set_irq_regs(old_regs); } +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +{ + __do_IRQ(regs); +} + static void *__init alloc_vm_stack(void) { return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cbc28d1a2e1b..7a7cd6bda53e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -292,7 +292,8 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) return 0; /* diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5ff0e55d0db1..defecb3b1b15 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1167,7 +1167,7 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (smp_ops->cpu_offline_self) + if (smp_ops && smp_ops->cpu_offline_self) c->hotpluggable = 1; #endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e45ce427bffb..c487ba5a6e11 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -586,7 +586,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); + __do_IRQ(regs); #endif old_regs = set_irq_regs(regs); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfbce527c98e..d56254f05e17 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1104,7 +1104,7 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) _exception(SIGTRAP, regs, TRAP_UNK, 0); } -DEFINE_INTERRUPT_HANDLER(single_step_exception) +static void __single_step_exception(struct pt_regs *regs) { clear_single_step(regs); clear_br_trace(regs); @@ -1121,6 +1121,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); } +DEFINE_INTERRUPT_HANDLER(single_step_exception) +{ + __single_step_exception(regs); +} + /* * After we have successfully emulated an instruction, we have to * check if the instruction was being single-stepped, and if so, @@ -1130,7 +1135,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) static void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) - single_step_exception(regs); + __single_step_exception(regs); } static inline int __parse_fpscr(unsigned long fpscr) diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 34bb1583fc0c..beb12cbc8c29 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -738,6 +738,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index de8595880fee..b87a63dba9c8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -628,6 +628,12 @@ emit_clear: break; /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..0dfaa6ab44cc 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; @@ -539,9 +539,10 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if * H_CPU_BEHAV_FAVOUR_SECURITY is. */ - if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) { security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); - else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 0; + } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) pseries_security_flavor = 1; else pseries_security_flavor = 2; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index dbdbbc2f1dc5..943fd30095af 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain; static struct xive_ipi_desc { unsigned int irq; char name[16]; + atomic_t started; } *xive_ipis; /* @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = { .alloc = xive_ipi_irq_domain_alloc, }; -static int __init xive_request_ipi(void) +static int __init xive_init_ipis(void) { struct fwnode_handle *fwnode; struct irq_domain *ipi_domain; @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void) struct xive_ipi_desc *xid = &xive_ipis[node]; struct xive_ipi_alloc_info info = { node }; - /* Skip nodes without CPUs */ - if (cpumask_empty(cpumask_of_node(node))) - continue; - /* * Map one IPI interrupt per node for all cpus of that node. * Since the HW interrupt number doesn't have any meaning, @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void) xid->irq = ret; snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); - - ret = request_irq(xid->irq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); - - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); } return ret; @@ -1178,6 +1170,22 @@ out: return ret; } +static int __init xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; +} + static int xive_setup_cpu_ipi(unsigned int cpu) { unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu) if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8fcceb8eda07..4f7b70ae7c31 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -492,10 +492,16 @@ config CC_HAVE_STACKPROTECTOR_TLS config STACKPROTECTOR_PER_TASK def_bool y + depends on !GCC_PLUGIN_RANDSTRUCT depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + config PHYS_RAM_BASE hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED default "0x80000000" help This is the physical address of RAM in the system. It has to be @@ -508,6 +514,7 @@ config XIP_KERNEL # This prevents XIP from being enabled by all{yes,mod}config, which # fail to build since XIP doesn't support large kernels. depends on !COMPILE_TEST + select PHYS_RAM_BASE_FIXED help Execute-In-Place allows the kernel to run from non-volatile storage directly addressable by the CPU, such as NOR flash. This saves RAM diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index b1c3c596578f..2e4ea84f27e7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -24,7 +24,7 @@ memory@80000000 { device_type = "memory"; - reg = <0x0 0x80000000 0x2 0x00000000>; + reg = <0x0 0x80000000 0x4 0x00000000>; }; soc { diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cca8764aed83..b0ca5058e7ae 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -103,6 +103,7 @@ struct kernel_mapping { }; extern struct kernel_mapping kernel_map; +extern phys_addr_t phys_ram_base; #ifdef CONFIG_64BIT #define is_kernel_mapping(x) \ @@ -113,9 +114,9 @@ extern struct kernel_mapping kernel_map; #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = y; \ - (_y >= CONFIG_PHYS_RAM_BASE) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d3081e4d9600..3397ddac1a30 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC -AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax +AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif extra-y += head.o diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ac7593607fa6..315db3d0229b 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,7 +27,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == current) { + } else if (task == NULL || task == current) { fp = (unsigned long)__builtin_frame_address(1); sp = (unsigned long)__builtin_frame_address(0); pc = (unsigned long)__builtin_return_address(0); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a14bf3910eec..7cb4f391d106 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -36,6 +36,9 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +phys_addr_t phys_ram_base __ro_after_init; +EXPORT_SYMBOL(phys_ram_base); + #ifdef CONFIG_XIP_KERNEL extern char _xiprom[], _exiprom[]; #endif @@ -160,7 +163,7 @@ static void __init setup_bootmem(void) phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t dram_end; + phys_addr_t phys_ram_end; #ifdef CONFIG_XIP_KERNEL vmlinux_start = __pa_symbol(&_sdata); @@ -181,9 +184,12 @@ static void __init setup_bootmem(void) #endif memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - dram_end = memblock_end_of_DRAM(); + phys_ram_end = memblock_end_of_DRAM(); #ifndef CONFIG_64BIT +#ifndef CONFIG_XIP_KERNEL + phys_ram_base = memblock_start_of_DRAM(); +#endif /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -191,15 +197,15 @@ static void __init setup_bootmem(void) * if end of dram is equal to maximum addressable memory. For 64-bit * kernel, this problem can't happen here as the end of the virtual * address space is occupied by the kernel mapping then this check must - * be done in create_kernel_page_table. + * be done as soon as the kernel mapping base address is determined. */ max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (dram_end - 1)) + if (max_mapped_addr == (phys_ram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); #endif - min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_low_pfn = max_pfn = PFN_DOWN(dram_end); + min_low_pfn = PFN_UP(phys_ram_base); + max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -558,6 +564,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); + phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 81de865f4c7c..e6497424cbf6 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 87e3bf5b9086..3af4131c22c7 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -939,6 +939,10 @@ out_be: emit_ld(rd, 0, RV_REG_T1, ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9b4473f76e56..161a9e12bfb8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -445,15 +445,15 @@ struct kvm_vcpu_stat { u64 instruction_sigp_init_cpu_reset; u64 instruction_sigp_cpu_reset; u64 instruction_sigp_unknown; - u64 diagnose_10; - u64 diagnose_44; - u64 diagnose_9c; - u64 diagnose_9c_ignored; - u64 diagnose_9c_forward; - u64 diagnose_258; - u64 diagnose_308; - u64 diagnose_500; - u64 diagnose_other; + u64 instruction_diagnose_10; + u64 instruction_diagnose_44; + u64 instruction_diagnose_9c; + u64 diag_9c_ignored; + u64 diag_9c_forward; + u64 instruction_diagnose_258; + u64 instruction_diagnose_308; + u64 instruction_diagnose_500; + u64 instruction_diagnose_other; u64 pfault_sync; }; diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 02c146f9e5cd..807fa9da1e72 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; - vcpu->stat.diagnose_10++; + vcpu->stat.instruction_diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) @@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", vcpu->run->s.regs.gprs[rx]); - vcpu->stat.diagnose_258++; + vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); - vcpu->stat.diagnose_44++; + vcpu->stat.instruction_diagnose_44++; kvm_vcpu_on_spin(vcpu, true); return 0; } @@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - vcpu->stat.diagnose_9c++; + vcpu->stat.instruction_diagnose_9c++; /* yield to self */ if (tid == vcpu->vcpu_id) @@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); - vcpu->stat.diagnose_9c_forward++; + vcpu->stat.diag_9c_forward++; return 0; } @@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) return 0; no_yield: VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); - vcpu->stat.diagnose_9c_ignored++; + vcpu->stat.diag_9c_ignored++; return 0; } @@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); - vcpu->stat.diagnose_308++; + vcpu->stat.instruction_diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; - vcpu->stat.diagnose_500++; + vcpu->stat.instruction_diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) @@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: - vcpu->stat.diagnose_other++; + vcpu->stat.instruction_diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b655a7d82bf0..4527ac7b5961 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), - STATS_DESC_COUNTER(VCPU, diagnose_10), - STATS_DESC_COUNTER(VCPU, diagnose_44), - STATS_DESC_COUNTER(VCPU, diagnose_9c), - STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored), - STATS_DESC_COUNTER(VCPU, diagnose_9c_forward), - STATS_DESC_COUNTER(VCPU, diagnose_258), - STATS_DESC_COUNTER(VCPU, diagnose_308), - STATS_DESC_COUNTER(VCPU, diagnose_500), - STATS_DESC_COUNTER(VCPU, diagnose_other), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), + STATS_DESC_COUNTER(VCPU, diag_9c_ignored), + STATS_DESC_COUNTER(VCPU, diag_9c_forward), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), STATS_DESC_COUNTER(VCPU, pfault_sync) }; static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2ae419f5115a..88419263a89a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1154,6 +1154,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, } break; /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 45a0549421cd..b683b69a4556 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -39,7 +39,6 @@ config SUPERH select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FTRACE_MCOUNT_RECORD select HAVE_HW_BREAKPOINT - select HAVE_IDE if HAS_IOPORT_MAP select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c5fa7932b550..f0c0f955e169 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -19,7 +19,6 @@ config SPARC select OF select OF_PROMTREE select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_SECCOMP if SPARC64 diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 4b8d3c65d266..9a2f20cbd48b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49270655e827..88fb922c23a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -202,7 +202,6 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1eb45139fcc6..3092fbf9dbe4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void) return; for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { - /* Metrics and fake events don't have corresponding HW counters. */ - if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) - continue; - else if (i >= INTEL_PMC_IDX_FIXED) + if (i >= INTEL_PMC_IDX_FIXED) { + /* Metrics and fake events don't have corresponding HW counters. */ + if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + continue; + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); - else + } else { wrmsrl(x86_pmu_event_addr(i), 0); + } } bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fca7a6e2242f..ac6fd2dabf6a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ static int intel_pmu_handle_irq(struct pt_regs *regs) { - struct cpu_hw_events *cpuc; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool late_ack = hybrid_bit(cpuc->pmu, late_ack); + bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack); int loops; u64 status; int handled; int pmu_enabled; - cpuc = this_cpu_ptr(&cpu_hw_events); - /* * Save the PMU state. * It needs to be restored when leaving the handler. */ pmu_enabled = cpuc->enabled; /* - * No known reason to not always do late ACK, - * but just in case do it opt-in. + * In general, the early ACK is only applied for old platforms. + * For the big core starts from Haswell, the late ACK should be + * applied. + * For the small core after Tremont, we have to do the ACK right + * before re-enabling counters, which is in the middle of the + * NMI handler. */ - if (!x86_pmu.late_ack) + if (!late_ack && !mid_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); cpuc->enabled = 0; @@ -2958,6 +2962,8 @@ again: goto again; done: + if (mid_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); /* Only restore PMU state when it's active. See x86_pmu_disable(). */ cpuc->enabled = pmu_enabled; if (pmu_enabled) @@ -2969,7 +2975,7 @@ done: * have been reset. This avoids spurious NMIs on * Haswell CPUs. */ - if (x86_pmu.late_ack) + if (late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.late_ack = true; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; x86_pmu.pebs_block = true; @@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; pmu->max_pebs_events = x86_pmu.max_pebs_events; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 2bf1c7ea2758..e3ac05c97b5e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -656,6 +656,10 @@ struct x86_hybrid_pmu { struct event_constraint *event_constraints; struct event_constraint *pebs_constraints; struct extra_reg *extra_regs; + + unsigned int late_ack :1, + mid_ack :1, + enabled_ack :1; }; static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) @@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ })) +#define hybrid_bit(_pmu, _field) \ +({ \ + bool __Fp = x86_pmu._field; \ + \ + if (is_hybrid() && (_pmu)) \ + __Fp = hybrid_pmu(_pmu)->_field; \ + \ + __Fp; \ +}) + enum hybrid_pmu_type { hybrid_big = 0x40, hybrid_small = 0x20, @@ -755,6 +769,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, + mid_ack :1, enabled_ack :1; /* * sysfs attrs @@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..af6ce8d4c86a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1038,6 +1038,13 @@ struct kvm_arch { struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5c691a3208b..39224e035e47 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static inline void init_IO_APIC_traps(void) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 44ebe25e7703..dbacb9ec8843 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * The quirk bit is not set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ if (!irqd_msi_nomask_quirk(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || cfg->dest_apicid == old_cfg.dest_apicid) { irq_msi_update_msg(irqd, cfg); return ret; @@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = msi_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, @@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = { .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static int dmar_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f07c10b87a87..57e4bb695ff9 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -285,15 +285,14 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >>= shift; } -static int __mon_event_count(u32 rmid, struct rmid_read *rr) +static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - rr->val = tval; - return -EINVAL; + return tval; } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: @@ -305,12 +304,6 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) case QOS_L3_MBM_LOCAL_EVENT_ID: m = &rr->d->mbm_local[rmid]; break; - default: - /* - * Code would never reach here because - * an invalid event id would fail the __rmid_read. - */ - return -EINVAL; } if (rr->first) { @@ -361,23 +354,29 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; + u64 ret_val; rdtgrp = rr->rgrp; - if (__mon_event_count(rdtgrp->mon.rmid, rr)) - return; + ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); /* - * For Ctrl groups read data from child monitor groups. + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. */ head = &rdtgrp->mon.crdtgrp_list; if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->mon.rmid, rr)) - return; + if (__mon_event_count(entry->mon.rmid, rr) == 0) + ret_val = 0; } } + + /* Report error if none of rmid_reads are successful */ + if (ret_val) + rr->val = ret_val; } /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 08651a4e6aa0..42fc41dd0e1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = { .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 739be5da3bca..fe03bd978761 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_mmu_after_set_cpuid(vcpu); } -static int is_efer_nx(void) -{ - return host_efer & EFER_NX; -} - -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_cpuid_entry2 *e, *entry; - - entry = NULL; - for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { - e = &vcpu->arch.cpuid_entries[i]; - if (e->function == 0x80000001) { - entry = e; - break; - } - } - if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) { - cpuid_entry_clear(entry, X86_FEATURE_NX); - printk(KERN_INFO "kvm: guest NX capability removed\n"); - } -} - int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_entries = e2; vcpu->arch.cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); kvm_update_cpuid_runtime(vcpu); kvm_vcpu_after_set_cpuid(vcpu); @@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) void kvm_set_cpu_caps(void) { - unsigned int f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned int f_gbpages = F(GBPAGES); unsigned int f_lm = F(LM); @@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void) F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | - f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) ); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..41d2a53c5dea 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1933,7 +1933,7 @@ ret_success: void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *entry; - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct kvm_vcpu_hv *hv_vcpu; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0); if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); @@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + kvm_hv_hypercall_read_xmm(&hc); + } + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 698969e18fe3..ff005fe738a4 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 660401700075..11e4065e1617 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID]; + u8 vectors[KVM_MAX_VCPU_ID + 1]; }; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..47b765270239 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); @@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *sp; + bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page @@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) if (sp->unsync) continue; + /* + * TDP MMU page faults require an additional spinlock as they + * run with mmu_lock held for read, not write, and the unsync + * logic is not thread safe. Take the spinklock regardless of + * the MMU type to avoid extra conditionals/parameters, there's + * no meaningful penalty if mmu_lock is held for write. + */ + if (!locked) { + locked = true; + spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock); + + /* + * Recheck after taking the spinlock, a different vCPU + * may have since marked the page unsync. A false + * positive on the unprotected check above is not + * possible as clearing sp->unsync _must_ hold mmu_lock + * for write, i.e. unsync cannot transition from 0->1 + * while this CPU holds mmu_lock for read (or write). + */ + if (READ_ONCE(sp->unsync)) + continue; + } + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } + if (locked) + spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible @@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); + if (!kvm_mmu_init_tdp_mmu(kvm)) /* * No smp_load/store wrappers needed here as we are in diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0853370bd811..d80cb122b5f3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; + WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); - kvm_lockdep_assert_mmu_lock_held(kvm, shared); if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) @@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, list_del_rcu(&root->link); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared); + zap_gfn_range(kvm, root, 0, -1ull, false, false, shared); call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } @@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush, bool shared) { + gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT); + bool zap_all = (start == 0 && end >= max_gfn_host); struct tdp_iter iter; + /* + * No need to try to step down in the iterator when zapping all SPTEs, + * zapping the top-level non-leaf SPTEs will recurse on their children. + */ + int min_level = zap_all ? root->role.level : PG_LEVEL_4K; + + /* + * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will + * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF, + * and so KVM will never install a SPTE for such addresses. + */ + end = min(end, max_gfn_host); + kvm_lockdep_assert_mmu_lock_held(kvm, shared); rcu_read_lock(); - tdp_root_for_each_pte(iter, root, start, end) { + for_each_tdp_pte_min_level(iter, root->spt, root->role.level, + min_level, start, end) { retry: if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) { @@ -744,9 +759,10 @@ retry: /* * If this is a non-last-level SPTE that covers a larger range * than should be zapped, continue, and zap the mappings at a - * lower level. + * lower level, except when zapping all SPTEs. */ - if ((iter.gfn < start || + if (!zap_all && + (iter.gfn < start || iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && !is_last_spte(iter.old_spte, iter.level)) continue; @@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush = false; int i; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, + flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush, false); if (flush) @@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm, */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); struct kvm_mmu_page *next_root; struct kvm_mmu_page *root; bool flush = false; @@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) rcu_read_unlock(); - flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush, - true); + flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true); /* * Put the reference acquired in diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1d01da64c333..a8ad78a2faa1 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -646,7 +646,7 @@ out: void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = svm->vmcb01.ptr; bool activated = kvm_vcpu_apicv_active(vcpu); if (!enable_apicv) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3bd09c50c98b..61738ff8ef33 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id, * avic_physical_id. */ - WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK); + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); /* Copied from vmcb01. msrpm_base can be overwritten later. */ svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl; @@ -702,8 +702,8 @@ out: } /* Copy state save area fields which are handled by VMRUN */ -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save) +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save) { to_save->es = from_save->es; to_save->cs = from_save->cs; @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; @@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 664d20f0689c..e8ccab50ebf6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) goto error_free_vmsa_page; } - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_switch_vmcb(svm, &svm->vmcb01); init_vmcb(vcpu); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; @@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; - /* The following fields are ignored when AVIC is enabled */ - WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + /* + * The following fields are ignored when AVIC is enabled + */ + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); + svm_set_intercept(svm, INTERCEPT_VINTR); /* @@ -2147,11 +2150,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - nested_svm_vmloadsave(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; - } else - nested_svm_vmloadsave(svm->vmcb, vmcb12); + } else { + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); + } kvm_vcpu_unmap(vcpu, &map, true); @@ -4344,8 +4348,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); - svm_copy_vmrun_state(&svm->vmcb01.ptr->save, - map_save.hva + 0x400); + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); kvm_vcpu_unmap(vcpu, &map_save, true); } @@ -4393,8 +4397,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) &map_save) == -EINVAL) return 1; - svm_copy_vmrun_state(map_save.hva + 0x400, - &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); kvm_vcpu_unmap(vcpu, &map_save, true); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7e2090752d8f..bd0fe94c2920 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save); -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save); +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 9b9a55abc29f..c53b8bf8d013 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( * as we mark it dirty unconditionally towards end of vcpu * init phase. */ - if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && + if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a52134b0c42..b3f77d18eb5a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) vcpu_put(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + +static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, + gpa_t addr) +{ + uint i; + struct kvm_mmu_root_info *cached_root; + + WARN_ON_ONCE(!mmu_is_nested(vcpu)); + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + cached_root = &vcpu->arch.mmu->prev_roots[i]; + + if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, + eptp)) + vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa); + } +} + static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; - } else if (fault->error_code & PFERR_RSVD_MASK) - vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; - else - vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + } else { + if (fault->error_code & PFERR_RSVD_MASK) + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + else + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + + /* + * Although the caller (kvm_inject_emulated_page_fault) would + * have already synced the faulting address in the shadow EPT + * tables for the current EPTP12, we also need to sync it for + * any other cached EPTP02s based on the same EP4TA, since the + * TLB associates mappings to the EP4TA rather than the full EPTP. + */ + nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer, + fault->address); + } nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; @@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return nested_vmx_succeed(vcpu); } -#define EPTP_PA_MASK GENMASK_ULL(51, 12) - -static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) -{ - return VALID_PAGE(root_hpa) && - ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); -} - /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { @@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, if (is_nmi(intr_info)) return true; else if (is_page_fault(intr_info)) - return vcpu->arch.apf.host_apf_flags || !enable_ept; + return vcpu->arch.apf.host_apf_flags || + vmx_need_pf_intercept(vcpu); else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db88ed4f2121..17a1cb4b059d 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow) static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - return vmx->secondary_exec_control & + return secondary_exec_controls_get(vmx) & SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4fd10604f72..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; if (data & 0x1) { vcpu->arch.apf.pageready_pending = false; @@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; msr_info->data = 0; @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4b951458c9fc..16d76f814e9b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3da88ded6ee3..3bfda5f502cb 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index fd1ab80be0de..a4cf678cf5c8 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -10,6 +10,7 @@ BEGIN { /^GNU objdump/ { verstr = "" + gsub(/\(.*\)/, ""); for (i = 3; i <= NF; i++) if (match($(i), "^[0-9]")) { verstr = $(i); diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 04c5a44b9682..9ba700dc47de 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -57,12 +57,12 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { [S_REL] = "^(__init_(begin|end)|" "__x86_cpu_dev_(start|end)|" - "(__parainstructions|__alt_instructions)(|_end)|" - "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" + "(__parainstructions|__alt_instructions)(_end)?|" + "(__iommu_table|__apicdrivers|__smp_locks)(_end)?|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl)|" - "__(start|stop)___kcrctab(|_gpl)|" + "__(start|stop)___ksymtab(_gpl)?|" + "__(start|stop)___kcrctab(_gpl)?|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b2156993..3878880469d1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS config XTENSA_PLATFORM_XT2000 bool "XT2000" - select HAVE_IDE help XT2000 is the name of Tensilica's feature-rich emulation platform. This hardware is capable of running a full Linux distribution. |