diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 51 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 72 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 8 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 4 |
10 files changed, 140 insertions, 62 deletions
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 714233cee0b5..3115caa7d7d0 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -290,6 +290,9 @@ static void __init uv_stringify(int len, char *to, char *from) { /* Relies on 'to' being NULL chars so result will be NULL terminated */ strncpy(to, from, len-1); + + /* Trim trailing spaces */ + (void)strim(to); } /* Find UV arch type entry in UVsystab */ @@ -366,7 +369,7 @@ static int __init early_get_arch_type(void) return ret; } -static int __init uv_set_system_type(char *_oem_id) +static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) { /* Save OEM_ID passed from ACPI MADT */ uv_stringify(sizeof(oem_id), oem_id, _oem_id); @@ -386,13 +389,23 @@ static int __init uv_set_system_type(char *_oem_id) /* (Not hubless), not a UV */ return 0; + /* Is UV hubless system */ + uv_hubless_system = 0x01; + + /* UV5 Hubless */ + if (strncmp(uv_archtype, "NSGI5", 5) == 0) + uv_hubless_system |= 0x20; + /* UV4 Hubless: CH */ - if (strncmp(uv_archtype, "NSGI4", 5) == 0) - uv_hubless_system = 0x11; + else if (strncmp(uv_archtype, "NSGI4", 5) == 0) + uv_hubless_system |= 0x10; /* UV3 Hubless: UV300/MC990X w/o hub */ else - uv_hubless_system = 0x9; + uv_hubless_system |= 0x8; + + /* Copy APIC type */ + uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id); pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n", oem_id, oem_table_id, uv_system_type, uv_hubless_system); @@ -456,7 +469,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id) uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; /* If not UV, return. */ - if (likely(uv_set_system_type(_oem_id) == 0)) + if (uv_set_system_type(_oem_id, _oem_table_id) == 0) return 0; /* Save and Decode OEM Table ID */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3f0db463f96..581fb7223ad0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { @@ -1261,16 +1269,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; + /* - * Indirect branch speculation is always disabled in strict - * mode. It can neither be enabled if it was force-disabled - * by a previous prctl call. + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + if (!is_spec_ib_user_controlled() || task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1283,10 +1301,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1351,20 +1369,17 @@ static int ib_prctl_get(struct task_struct *task) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) - return PR_SPEC_DISABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || - spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - } else + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else return PR_SPEC_NOT_AFFECTED; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..d50041f570e8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return 0; } +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + + /* + * save the feature bitmap to avoid cpuid lookup for every PV + * operation + */ + if (best) + vcpu->arch.pv_cpuid.features = best->eax; +} + void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - /* - * save the feature bitmap to avoid cpuid lookup for every PV - * operation - */ - if (best) - vcpu->arch.pv_cpuid.features = best->eax; - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = kvm_find_cpuid_entry(vcpu, 0x1, 0); if (best) @@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; + kvm_update_pv_runtime(vcpu); + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index bf8577947ed2..f7a6e8f83783 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1f96adff8dc4..5bb1939b65d8 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { - desc = desc->more; + while (desc->sptes[PTE_LIST_EXT-1]) { count += PTE_LIST_EXT; - } - if (desc->sptes[PTE_LIST_EXT-1]) { - desc->more = mmu_alloc_pte_list_desc(vcpu); + + if (!desc->more) { + desc->more = mmu_alloc_pte_list_desc(vcpu); + desc = desc->more; + break; + } desc = desc->more; } for (i = 0; desc->sptes[i]; ++i) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f5ede41bf9e6..447edc0d1d5a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,11 +255,10 @@ static struct kmem_cache *x86_emulator_cache; /* * When called, it means the previous get/set msr reached an invalid msr. - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want - * to fail the caller. + * Return true if we want to ignore/silent this failed msr access. */ -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, - u64 data, bool write) +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, + u64 data, bool write) { const char *op = write ? "wrmsr" : "rdmsr"; @@ -268,11 +267,11 @@ static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", op, msr, data); /* Mask the error */ - return 0; + return true; } else { kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", op, msr, data); - return -ENOENT; + return false; } } @@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) if (r == KVM_MSR_RET_INVALID) { /* Unconditionally clear the output for simplicity */ *data = 0; - r = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + r = 0; } if (r) @@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, struct msr_data msr; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return -EPERM; + return KVM_MSR_RET_FILTERED; switch (index) { case MSR_FS_BASE: @@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, int ret = __kvm_set_msr(vcpu, index, data, host_initiated); if (ret == KVM_MSR_RET_INVALID) - ret = kvm_msr_ignored_check(vcpu, index, data, true); + if (kvm_msr_ignored_check(vcpu, index, data, true)) + ret = 0; return ret; } @@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, int ret; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return -EPERM; + return KVM_MSR_RET_FILTERED; msr.index = index; msr.host_initiated = host_initiated; @@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, if (ret == KVM_MSR_RET_INVALID) { /* Unconditionally clear *data for simplicity */ *data = 0; - ret = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + ret = 0; } return ret; @@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case -ENOENT: + case KVM_MSR_RET_INVALID: return KVM_MSR_EXIT_REASON_UNKNOWN; - case -EPERM: + case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; default: return KVM_MSR_EXIT_REASON_INVAL; @@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, struct kvm_arch *ka = &vcpu->kvm->arch; if (vcpu->vcpu_id == 0 && !host_initiated) { - if (ka->boot_vcpu_runs_old_kvmclock && old_msr) + if (ka->boot_vcpu_runs_old_kvmclock != old_msr) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ka->boot_vcpu_runs_old_kvmclock = old_msr; @@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Values other than LBR and BTF are vendor-specific, thus reserved and should throw a #GP */ return 1; - } - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", - __func__, data); + } else if (report_ignored_msrs) + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", + __func__, data); break; case 0x200 ... 0x2ff: return kvm_mtrr_set_msr(vcpu, msr, data); @@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.efer; break; case MSR_KVM_WALL_CLOCK: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->kvm->arch.wall_clock; + break; case MSR_KVM_WALL_CLOCK_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->kvm->arch.wall_clock; break; case MSR_KVM_SYSTEM_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->arch.time; + break; case MSR_KVM_SYSTEM_TIME_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = vcpu->arch.apf.msr_en_val; break; case MSR_KVM_ASYNC_PF_INT: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) + return 1; + msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) + return 1; + msr_info->data = vcpu->arch.st.msr_val; break; case MSR_KVM_PV_EOI_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) + return 1; + msr_info->data = vcpu->arch.pv_eoi.msr_val; break; case MSR_KVM_POLL_CONTROL: + if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) + return 1; + msr_info->data = vcpu->arch.msr_kvm_poll_control; break; case MSR_IA32_P5_MC_ADDR: @@ -4575,6 +4611,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; + if (vcpu->arch.pv_cpuid.enforce) + kvm_update_pv_runtime(vcpu); return 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3900ab0c6004..e7ca622a468f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); -#define KVM_MSR_RET_INVALID 2 +/* + * Internal error codes that are used to indicate that MSR emulation encountered + * an error that should result in #GP in the guest, unless userspace + * handles it. + */ +#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ +#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 037faac46b0c..1e299ac73c86 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 7ff00ea64e4f..41902fe8b859 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,9 +24,7 @@ * Output: * rax: dest */ -.weak memmove - -SYM_FUNC_START_ALIAS(memmove) +SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9ff15ee404a4..0bfd26e4ca9e 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -6,8 +6,6 @@ #include <asm/alternative-asm.h> #include <asm/export.h> -.weak memset - /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is @@ -19,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended |