diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 213 |
1 files changed, 127 insertions, 86 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef5102f80497..6069af86da3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -311,13 +311,13 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); u64 new_state = msr_info->data & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | - 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); + u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | + (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); + if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) + return 1; if (!msr_info->host_initiated && - ((msr_info->data & reserved_bits) != 0 || - new_state == X2APIC_ENABLE || - (new_state == MSR_IA32_APICBASE_ENABLE && + ((new_state == MSR_IA32_APICBASE_ENABLE && old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && old_state == 0))) @@ -390,15 +390,28 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, kvm_make_request(KVM_REQ_EVENT, vcpu); - if (!vcpu->arch.exception.pending) { + if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) { queue: if (has_error && !is_protmode(vcpu)) has_error = false; - vcpu->arch.exception.pending = true; + if (reinject) { + /* + * On vmentry, vcpu->arch.exception.pending is only + * true if an event injection was blocked by + * nested_run_pending. In that case, however, + * vcpu_enter_guest requests an immediate exit, + * and the guest shouldn't proceed far enough to + * need reinjection. + */ + WARN_ON_ONCE(vcpu->arch.exception.pending); + vcpu->arch.exception.injected = true; + } else { + vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; + } vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; - vcpu->arch.exception.reinject = reinject; return; } @@ -413,8 +426,13 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, class2 = exception_class(nr); if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { - /* generate double fault per SDM Table 5-5 */ + /* + * Generate double fault per SDM Table 5-5. Set + * exception.pending = true so that the double fault + * can trigger a nested vmexit. + */ vcpu->arch.exception.pending = true; + vcpu->arch.exception.injected = false; vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; @@ -755,19 +773,22 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & CR4_RESERVED_BITS) return 1; - if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) + return 1; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP)) return 1; - if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP)) return 1; - if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE)) return 1; - if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE)) return 1; - if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57)) return 1; if (is_long_mode(vcpu)) { @@ -780,7 +801,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) { - if (!guest_cpuid_has_pcid(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID)) return 1; /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */ @@ -814,10 +835,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) return 0; } - if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESERVED_BITS) - return 1; - } else if (is_pae(vcpu) && is_paging(vcpu) && + if (is_long_mode(vcpu) && + (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62))) + return 1; + else if (is_pae(vcpu) && is_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; @@ -884,7 +905,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { u64 fixed = DR6_FIXED_1; - if (!guest_cpuid_has_rtm(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; return fixed; } @@ -994,6 +1015,7 @@ static u32 emulated_msrs[] = { MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, + HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY, HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2, HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL, HV_X64_MSR_RESET, @@ -1022,21 +1044,11 @@ bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) if (efer & efer_reserved_bits) return false; - if (efer & EFER_FFXSR) { - struct kvm_cpuid_entry2 *feat; - - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) + if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT)) return false; - } - if (efer & EFER_SVME) { - struct kvm_cpuid_entry2 *feat; - - feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); - if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) + if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return false; - } return true; } @@ -1084,7 +1096,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_KERNEL_GS_BASE: case MSR_CSTAR: case MSR_LSTAR: - if (is_noncanonical_address(msr->data)) + if (is_noncanonical_address(msr->data, vcpu)) return 1; break; case MSR_IA32_SYSENTER_EIP: @@ -1101,7 +1113,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - msr->data = get_canonical(msr->data); + msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu)); } return kvm_x86_ops->set_msr(vcpu, msr); } @@ -1534,8 +1546,9 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; - if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated) + if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) update_ia32_tsc_adjust_msr(vcpu, offset); + kvm_vcpu_write_tsc_offset(vcpu, offset); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); @@ -2185,7 +2198,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_set_lapic_tscdeadline_msr(vcpu, data); break; case MSR_IA32_TSC_ADJUST: - if (guest_cpuid_has_tsc_adjust(vcpu)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) { if (!msr_info->host_initiated) { s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr; adjust_tsc_offset_guest(vcpu, adj); @@ -2307,12 +2320,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.length = data; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; vcpu->arch.osvw.status = data; break; @@ -2537,12 +2550,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0xbe702111; break; case MSR_AMD64_OSVW_ID_LENGTH: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.length; break; case MSR_AMD64_OSVW_STATUS: - if (!guest_cpuid_has_osvw(vcpu)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) return 1; msr_info->data = vcpu->arch.osvw.status; break; @@ -2882,6 +2895,10 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { int idx; + + if (vcpu->preempted) + vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu); + /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -3074,8 +3091,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { process_nmi(vcpu); + /* + * FIXME: pass injected and pending separately. This is only + * needed for nested virtualization, whose state cannot be + * migrated yet. For now we can combine them. + */ events->exception.injected = - vcpu->arch.exception.pending && + (vcpu->arch.exception.pending || + vcpu->arch.exception.injected) && !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; @@ -3130,6 +3153,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); + vcpu->arch.exception.injected = false; vcpu->arch.exception.pending = events->exception.injected; vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; @@ -4671,25 +4695,18 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, */ if (vcpu->arch.gpa_available && emulator_can_use_gpa(ctxt) && - vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && - (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { - gpa = exception->address; - goto mmio; + (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) { + gpa = vcpu->arch.gpa_val; + ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write); + } else { + ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); + if (ret < 0) + return X86EMUL_PROPAGATE_FAULT; } - ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); - - if (ret < 0) - return X86EMUL_PROPAGATE_FAULT; - - /* For APIC access vmexit */ - if (ret) - goto mmio; - - if (ops->read_write_emulate(vcpu, gpa, val, bytes)) + if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes)) return X86EMUL_CONTINUE; -mmio: /* * Is this MMIO handled locally? */ @@ -5227,10 +5244,10 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt, return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage); } -static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) +static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit) { - kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); + return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit); } static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) @@ -6362,11 +6379,42 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) int r; /* try to reinject previous events if any */ + if (vcpu->arch.exception.injected) { + kvm_x86_ops->queue_exception(vcpu); + return 0; + } + + /* + * Exceptions must be injected immediately, or the exception + * frame will have the address of the NMI or interrupt handler. + */ + if (!vcpu->arch.exception.pending) { + if (vcpu->arch.nmi_injected) { + kvm_x86_ops->set_nmi(vcpu); + return 0; + } + + if (vcpu->arch.interrupt.pending) { + kvm_x86_ops->set_irq(vcpu); + return 0; + } + } + + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } + + /* try to inject new event if pending */ if (vcpu->arch.exception.pending) { trace_kvm_inj_exception(vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + vcpu->arch.exception.pending = false; + vcpu->arch.exception.injected = true; + if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); @@ -6378,27 +6426,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - return 0; - } - - if (vcpu->arch.nmi_injected) { - kvm_x86_ops->set_nmi(vcpu); - return 0; - } - - if (vcpu->arch.interrupt.pending) { - kvm_x86_ops->set_irq(vcpu); - return 0; - } - - if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { - r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); - if (r != 0) - return r; - } - - /* try to inject new event if pending */ - if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { vcpu->arch.smi_pending = false; enter_smm(vcpu); } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { @@ -6615,7 +6643,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); vcpu->arch.hflags |= HF_SMM_MASK; memset(buf, 0, 512); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else enter_smm_save_state_32(vcpu, buf); @@ -6667,7 +6695,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); - if (guest_cpuid_has_longmode(vcpu)) + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) kvm_x86_ops->set_efer(vcpu, 0); kvm_update_cpuid(vcpu); @@ -6774,6 +6802,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; + vcpu->mmio_needed = 0; r = 0; goto out; } @@ -6862,6 +6891,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); + WARN_ON(vcpu->arch.exception.pending); } if (kvm_lapic_enabled(vcpu)) { @@ -7004,6 +7034,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (vcpu->arch.apic_attention) kvm_lapic_sync_from_vapic(vcpu); + vcpu->arch.gpa_available = false; r = kvm_x86_ops->handle_exit(vcpu); return r; @@ -7422,7 +7453,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int pending_vec, max_bits, idx; struct desc_ptr dt; - if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE)) + if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + (sregs->cr4 & X86_CR4_OSXSAVE)) + return -EINVAL; + + apic_base_msr.data = sregs->apic_base; + apic_base_msr.host_initiated = true; + if (kvm_set_apic_base(vcpu, &apic_base_msr)) return -EINVAL; dt.size = sregs->idt.limit; @@ -7441,9 +7478,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->arch.efer != sregs->efer; kvm_x86_ops->set_efer(vcpu, sregs->efer); - apic_base_msr.data = sregs->apic_base; - apic_base_msr.host_initiated = true; - kvm_set_apic_base(vcpu, &apic_base_msr); mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; kvm_x86_ops->set_cr0(vcpu, sregs->cr0); @@ -7734,6 +7768,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.nmi_injected = false; kvm_clear_interrupt_queue(vcpu); kvm_clear_exception_queue(vcpu); + vcpu->arch.exception.pending = false; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); @@ -7993,6 +8028,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; + vcpu->arch.preempted_in_kernel = false; kvm_hv_vcpu_init(vcpu); @@ -8440,6 +8476,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.preempted_in_kernel; +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; |