summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig9
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/events/intel/lbr.c19
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/fpu/api.h2
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/sev.h7
-rw-r--r--arch/x86/include/asm/special_insns.h9
-rw-r--r--arch/x86/include/asm/tlb.h3
-rw-r--r--arch/x86/include/asm/tlbflush.h1
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c47
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h2
-rw-r--r--arch/x86/kernel/cpu/vmware.c4
-rw-r--r--arch/x86/kernel/fpu/core.c14
-rw-r--r--arch/x86/kernel/pmem.c7
-rw-r--r--arch/x86/kernel/process.c44
-rw-r--r--arch/x86/kernel/sev-shared.c25
-rw-r--r--arch/x86/kernel/sev.c17
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/pkeys.c15
-rw-r--r--arch/x86/mm/tlb.c31
27 files changed, 230 insertions, 79 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e58798f636d4..52a7f91527fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,7 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
@@ -2473,7 +2474,7 @@ config RETHUNK
bool "Enable return-thunks"
depends on RETPOLINE && CC_HAS_RETURN_THUNK
select OBJTOOL if HAVE_OBJTOOL
- default y
+ default y if X86_64
help
Compile the kernel with the return-thunks compiler option to guard
against kernel-to-user data leaks by avoiding return speculation.
@@ -2482,21 +2483,21 @@ config RETHUNK
config CPU_UNRET_ENTRY
bool "Enable UNRET on kernel entry"
- depends on CPU_SUP_AMD && RETHUNK
+ depends on CPU_SUP_AMD && RETHUNK && X86_64
default y
help
Compile the kernel with support for the retbleed=unret mitigation.
config CPU_IBPB_ENTRY
bool "Enable IBPB on kernel entry"
- depends on CPU_SUP_AMD
+ depends on CPU_SUP_AMD && X86_64
default y
help
Compile the kernel with support for the retbleed=ibpb mitigation.
config CPU_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
- depends on CPU_SUP_INTEL
+ depends on CPU_SUP_INTEL && X86_64
default y
help
Compile the kernel with support for the spectre_v2=ibrs mitigation.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f40dad30d50..7854685c5f25 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,6 +27,7 @@ RETHUNK_CFLAGS := -mfunction-return=thunk-extern
RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
endif
+export RETHUNK_CFLAGS
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 13179f31fe10..4f70fb6c2c1e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -278,9 +278,9 @@ enum {
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
@@ -288,7 +288,7 @@ enum {
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
@@ -300,7 +300,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && x86_pmu.lbr_has_tsx;
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
@@ -1609,9 +1609,6 @@ void intel_pmu_lbr_init_hsw(void)
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
@@ -1702,7 +1699,11 @@ void intel_pmu_lbr_init(void)
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
- fallthrough;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 00f5227c8459..5fe7f6c8a7a4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,7 +219,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
+#define X86_FEATURE_ZEN (7*32+28) /* "" CPU based on Zen microarchitecture */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -302,6 +302,7 @@
#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 6b0f31fb53f7..503a577814b2 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -164,4 +164,6 @@ static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
/* prctl */
extern long fpu_xstate_prctl(int option, unsigned long arg2);
+extern void fpu_idle_fpregs(void);
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 29dd27b5a339..3a8fdf881313 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -13,6 +13,7 @@
#define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
+#define MWAIT_C1_SUBSTATE_MASK 0xf0
#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 42c09bc8ae18..cba942006ffe 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -309,6 +309,8 @@ do { \
alternative_msr_write(MSR_IA32_SPEC_CTRL, \
spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 19514524f0f8..4a23e52fe0ee 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -72,7 +72,6 @@ static inline u64 lower_bits(u64 val, unsigned int bits)
struct real_mode_header;
enum stack_type;
-struct ghcb;
/* Early IDT entry points for #VC handler */
extern void vc_no_ghcb(void);
@@ -156,11 +155,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- bool set_ghcb_msr,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2);
+
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 45b18eb94fa1..35f709f619fb 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -295,6 +295,15 @@ static inline int enqcmds(void __iomem *dst, const void *src)
return 0;
}
+static inline void tile_release(void)
+{
+ /*
+ * Instruction opcode for TILERELEASE; supported in binutils
+ * version >= 2.36.
+ */
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0");
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 1bfe979bb9bc..580636cdc257 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -2,9 +2,6 @@
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4af5579c7ef7..cda3118f3b27 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -16,6 +16,7 @@
void __flush_tlb_all(void);
#define TLB_FLUSH_ALL -1UL
+#define TLB_GENERATION_INVALID 0
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
unsigned long cr4_read_shadow(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index d6858533e6e5..62f6b8b7c4a5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -555,7 +555,9 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
dest = addr + insn.length + insn.immediate.value;
if (__static_call_fixup(addr, op, dest) ||
- WARN_ON_ONCE(dest != &__x86_return_thunk))
+ WARN_ONCE(dest != &__x86_return_thunk,
+ "missing return thunk: %pS-%pS: %*ph",
+ addr, dest, 5, addr))
continue;
DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 190e0f763375..4266b64631a4 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -19,17 +19,23 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0
#define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480
#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630
+#define PCI_DEVICE_ID_AMD_17H_MA0H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M10H_ROOT 0x14a4
+#define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8
+#define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
+#define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4 0x1728
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F4 0x14b1
#define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5
#define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
+#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
+#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -41,8 +47,11 @@ static const struct pci_device_id amd_root_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_ROOT) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
{}
};
@@ -61,12 +70,15 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
{}
};
@@ -81,6 +93,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_MA0H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index aa34f908c39f..6761668100b9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -975,6 +975,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
@@ -1415,6 +1416,8 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_IBRS:
setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
break;
case SPECTRE_V2_LFENCE:
@@ -1516,7 +1519,17 @@ static void __init spectre_v2_select_mitigation(void)
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ boot_cpu_has(X86_FEATURE_IBPB) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+
+ if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
+ pr_info("Enabling Speculation Barrier for firmware calls\n");
+ }
+
+ } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fd5dead8371c..663f6e6dd288 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -682,9 +682,9 @@ static void init_intel(struct cpuinfo_x86 *c)
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
- if (!(l1 & (1<<11)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_BTS);
- if (!(l1 & (1<<12)))
+ if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
set_cpu_cap(c, X86_FEATURE_PEBS);
}
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ffb3233..12cf2e7ca33c 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -33,6 +33,8 @@
#include "internal.h"
+static bool hw_injection_possible = true;
+
/*
* Collect all the MCi_XXX settings
*/
@@ -339,6 +341,8 @@ static int __set_inj(const char *buf)
for (i = 0; i < N_INJ_TYPES; i++) {
if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ if (i > SW_INJ && !hw_injection_possible)
+ continue;
inj_type = i;
return 0;
}
@@ -717,11 +721,54 @@ static void __init debugfs_init(void)
&i_mce, dfs_fls[i].fops);
}
+static void check_hw_inj_possible(void)
+{
+ int cpu;
+ u8 bank;
+
+ /*
+ * This behavior exists only on SMCA systems though its not directly
+ * related to SMCA.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_SMCA))
+ return;
+
+ cpu = get_cpu();
+
+ for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
+ u64 status = MCI_STATUS_VAL, ipid;
+
+ /* Check whether bank is populated */
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ if (!ipid)
+ continue;
+
+ toggle_hw_mce_inject(cpu, true);
+
+ wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+
+ if (!status) {
+ hw_injection_possible = false;
+ pr_warn("Platform does not allow *hardware* error injection."
+ "Try using APEI EINJ instead.\n");
+ }
+
+ toggle_hw_mce_inject(cpu, false);
+
+ break;
+ }
+
+ put_cpu();
+}
+
static int __init inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;
+ check_hw_inj_possible();
+
debugfs_init();
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 4ae0e603f7fa..7e03f5b7f6bd 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- if (mce_flags.smca) {
+ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index c04b933f48d3..02039ec3597d 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -476,8 +476,8 @@ static bool __init vmware_legacy_x2apic_available(void)
{
uint32_t eax, ebx, ecx, edx;
VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
- return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
- (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
+ return !(eax & BIT(VMWARE_CMD_VCPU_RESERVED)) &&
+ (eax & BIT(VMWARE_CMD_LEGACY_X2APIC));
}
#ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0531d6a06df5..3b28c5b25e12 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -851,3 +851,17 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
*/
return 0;
}
+
+/*
+ * Initialize register state that may prevent from entering low-power idle.
+ * This function will be invoked from the cpuidle driver only when needed.
+ */
+void fpu_idle_fpregs(void)
+{
+ /* Note: AMX_TILE being enabled implies XGETBV1 support */
+ if (cpu_feature_enabled(X86_FEATURE_AMX_TILE) &&
+ (xfeatures_in_use() & XFEATURE_MASK_XTILE)) {
+ tile_release();
+ fpregs_deactivate(&current->thread.fpu);
+ }
+}
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 6b07faaa1579..23154d24b117 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -27,6 +27,11 @@ static __init int register_e820_pmem(void)
* simply here to trigger the module to load on demand.
*/
pdev = platform_device_alloc("e820_pmem", -1);
- return platform_device_add(pdev);
+
+ rc = platform_device_add(pdev);
+ if (rc)
+ platform_device_put(pdev);
+
+ return rc;
}
device_initcall(register_e820_pmem);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d456ce21c255..58a6ea472db9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -810,24 +810,43 @@ static void amd_e400_idle(void)
}
/*
- * Intel Core2 and older machines prefer MWAIT over HALT for C1.
- * We can't rely on cpuidle installing MWAIT, because it will not load
- * on systems that support only C1 -- so the boot default must be MWAIT.
+ * Prefer MWAIT over HALT if MWAIT is supported, MWAIT_CPUID leaf
+ * exists and whenever MONITOR/MWAIT extensions are present there is at
+ * least one C1 substate.
*
- * Some AMD machines are the opposite, they depend on using HALT.
- *
- * So for default C1, which is used during boot until cpuidle loads,
- * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ * Do not prefer MWAIT if MONITOR instruction has a bug or idle=nomwait
+ * is passed to kernel commandline parameter.
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ u32 eax, ebx, ecx, edx;
+
+ /* User has disallowed the use of MWAIT. Fallback to HALT */
+ if (boot_option_idle_override == IDLE_NOMWAIT)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
+ /* MWAIT is not supported on this platform. Fallback to HALT */
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
return 0;
- return 1;
+ /* Monitor has a bug. Fallback to HALT */
+ if (boot_cpu_has_bug(X86_BUG_MONITOR))
+ return 0;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ /*
+ * If MWAIT extensions are not available, it is safe to use MWAIT
+ * with EAX=0, ECX=0.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED))
+ return 1;
+
+ /*
+ * If MWAIT extensions are available, there should be at least one
+ * MWAIT C1 substate present.
+ */
+ return (edx & MWAIT_C1_SUBSTATE_MASK);
}
/*
@@ -932,9 +951,8 @@ static int __init idle_setup(char *str)
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
- * it means that mwait will be disabled for CPU C2/C3
- * states. In such case it won't touch the variable
- * of boot_option_idle_override.
+ * it means that mwait will be disabled for CPU C1/C2/C3
+ * states.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index b478edf43bec..3a5b0c9c4fcc 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -219,9 +219,10 @@ static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt
return ES_VMM_ERROR;
}
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
- struct es_em_ctxt *ctxt, u64 exit_code,
- u64 exit_info_1, u64 exit_info_2)
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
ghcb->protocol_version = ghcb_version;
@@ -231,14 +232,7 @@ enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
- /*
- * Hyper-V unenlightened guests use a paravisor for communicating and
- * GHCB pages are being allocated and set up by that paravisor. Linux
- * should not change the GHCB page's physical address.
- */
- if (set_ghcb_msr)
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
+ sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
return verify_exception_info(ghcb, ctxt);
@@ -795,7 +789,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
*/
sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
ghcb_set_sw_scratch(ghcb, sw_scratch);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
exit_info_1, exit_info_2);
if (ret != ES_OK)
return ret;
@@ -837,8 +831,7 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rax(ghcb, rax);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
- SVM_EXIT_IOIO, exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
if (ret != ES_OK)
return ret;
@@ -894,7 +887,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
/* xgetbv will cause #GP - use reset value for xcr0 */
ghcb_set_xcr0(ghcb, 1);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
if (ret != ES_OK)
return ret;
@@ -919,7 +912,7 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
enum es_result ret;
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
if (ret != ES_OK)
return ret;
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c05f0124c410..63dc626627a0 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -786,7 +786,7 @@ static int vmgexit_psc(struct snp_psc_desc *desc)
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
/* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/*
* Page State Change VMGEXIT can pass error code through
@@ -1212,8 +1212,7 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rdx(ghcb, regs->dx);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
- exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
if ((ret == ES_OK) && (!exit_info_1)) {
regs->ax = ghcb->save.rax;
@@ -1452,7 +1451,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}
/*
@@ -1628,7 +1627,7 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax(ghcb, val);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1658,7 +1657,7 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
}
static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
@@ -1667,7 +1666,7 @@ static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt
ghcb_set_rcx(ghcb, ctxt->regs->cx);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
if (ret != ES_OK)
return ret;
@@ -1708,7 +1707,7 @@ static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
if (x86_platform.hyper.sev_es_hcall_prepare)
x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
if (ret != ES_OK)
return ret;
@@ -2197,7 +2196,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
ghcb_set_rbx(ghcb, input->data_npages);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
if (ret)
goto e_put;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 143e37298d8a..e5fa335a4ea7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6029,6 +6029,11 @@ split_irqchip_unlock:
r = 0;
break;
case KVM_CAP_X86_USER_SPACE_MSR:
+ r = -EINVAL;
+ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER))
+ break;
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
@@ -6183,6 +6188,9 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
+ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 57ba5502aecf..82a042c03824 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -856,7 +856,7 @@ int devmem_is_allowed(unsigned long pagenr)
/*
* This must follow RAM test, since System RAM is considered a
- * restricted resource under CONFIG_STRICT_IOMEM.
+ * restricted resource under CONFIG_STRICT_DEVMEM.
*/
if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
/* Low 1MB bypasses iomem restrictions. */
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index e44e938885b7..7418c367e328 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -110,7 +110,7 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
return vma_pkey(vma);
}
-#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
+#define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
/*
* Make the default PKRU value (at execve() time) as restrictive
@@ -118,11 +118,14 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
- PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
- PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
- PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
- PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
+u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
+ PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
+ PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
+ PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
+ PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
+ PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
+ PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
+ PKRU_AD_MASK(15);
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d400b6d9d246..c1e31e9a85d7 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
const struct flush_tlb_info *f = info;
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
- u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
bool local = smp_processor_id() == f->initiating_cpu;
unsigned long nr_invalidate = 0;
+ u64 mm_tlb_gen;
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
return;
}
+ if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
+ f->new_tlb_gen <= local_tlb_gen)) {
+ /*
+ * The TLB is already up to date in respect to f->new_tlb_gen.
+ * While the core might be still behind mm_tlb_gen, checking
+ * mm_tlb_gen unnecessarily would have negative caching effects
+ * so avoid it.
+ */
+ return;
+ }
+
+ /*
+ * Defer mm_tlb_gen reading as long as possible to avoid cache
+ * contention.
+ */
+ mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
+
if (unlikely(local_tlb_gen == mm_tlb_gen)) {
/*
* There's nothing to do: we're already up to date. This can
@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
/* Partial flush */
unsigned long addr = f->start;
+ /* Partial flush cannot have invalid generations */
+ VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
+
+ /* Partial flush must have valid mm */
+ VM_WARN_ON(f->mm == NULL);
+
nr_invalidate = (f->end - f->start) >> f->stride_shift;
while (addr < f->end) {
@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
struct flush_tlb_info *info;
preempt_disable();
- info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+ info = get_flush_tlb_info(NULL, start, end, 0, false,
+ TLB_GENERATION_INVALID);
on_each_cpu(do_kernel_range_flush, info, 1);
@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
int cpu = get_cpu();
- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
+ info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+ TLB_GENERATION_INVALID);
/*
* flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling