diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 231 |
1 files changed, 153 insertions, 78 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03869eb7fcd6..c53298dfbf50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool __read_mostly report_ignored_msrs = true; +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); + unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); @@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); @@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, @@ -2006,10 +2015,12 @@ static void kvmclock_sync_fn(struct work_struct *work) KVMCLOCK_SYNC_PERIOD); } -static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u64 mcg_cap = vcpu->arch.mcg_cap; unsigned bank_num = mcg_cap & 0xff; + u32 msr = msr_info->index; + u64 data = msr_info->data; switch (msr) { case MSR_IA32_MCG_STATUS: @@ -2034,6 +2045,9 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) if ((offset & 0x3) == 0 && data != 0 && (data | (1 << 10)) != ~(u64)0) return -1; + if (!msr_info->host_initiated && + (offset & 0x3) == 1 && data != 0) + return -1; vcpu->arch.mce_banks[offset] = data; break; } @@ -2283,7 +2297,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: - return set_msr_mce(vcpu, msr, data); + return set_msr_mce(vcpu, msr_info); case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: @@ -2317,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) @@ -2354,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, + "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; } } @@ -2573,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->index); return 1; } else { - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", + msr_info->index); msr_info->data = 0; } break; @@ -2917,7 +2937,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); - kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); } @@ -4034,10 +4053,16 @@ long kvm_arch_vm_ioctl(struct file *filp, case KVM_SET_IDENTITY_MAP_ADDR: { u64 ident_addr; + mutex_lock(&kvm->lock); + r = -EINVAL; + if (kvm->created_vcpus) + goto set_identity_unlock; r = -EFAULT; if (copy_from_user(&ident_addr, argp, sizeof ident_addr)) - goto out; + goto set_identity_unlock; r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); +set_identity_unlock: + mutex_unlock(&kvm->lock); break; } case KVM_SET_NR_MMU_PAGES: @@ -4359,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) addr, n, v)) && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v); handled += n; addr += n; len -= n; @@ -4618,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_fragments[0].gpa, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, val); vcpu->mmio_read_completed = 0; return 1; } @@ -4640,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val) { - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val); + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val); return vcpu_mmio_write(vcpu, gpa, bytes, val); } static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0); + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL); return X86EMUL_IO_NEEDED; } @@ -5226,17 +5251,6 @@ static void emulator_halt(struct x86_emulate_ctxt *ctxt) emul_to_vcpu(ctxt)->arch.halt_request = 1; } -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_disable(); - kvm_load_guest_fpu(emul_to_vcpu(ctxt)); -} - -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt) -{ - preempt_enable(); -} - static int emulator_intercept(struct x86_emulate_ctxt *ctxt, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -5275,6 +5289,11 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); } +static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, u64 smbase) +{ + return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smbase); +} + static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, @@ -5309,13 +5328,12 @@ static const struct x86_emulate_ops emulate_ops = { .halt = emulator_halt, .wbinvd = emulator_wbinvd, .fix_hypercall = emulator_fix_hypercall, - .get_fpu = emulator_get_fpu, - .put_fpu = emulator_put_fpu, .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, .set_hflags = emulator_set_hflags, + .pre_leave_smm = emulator_pre_leave_smm, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -5413,7 +5431,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); @@ -5705,6 +5723,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); @@ -6426,7 +6446,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) { vcpu->arch.smi_pending = false; enter_smm(vcpu); } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) { @@ -6473,9 +6493,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } -#define put_smstate(type, buf, offset, val) \ - *(type *)((buf) + (offset) - 0x7e00) = val - static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) { u32 flags = 0; @@ -6641,13 +6658,20 @@ static void enter_smm(struct kvm_vcpu *vcpu) u32 cr0; trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true); - vcpu->arch.hflags |= HF_SMM_MASK; memset(buf, 0, 512); if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) enter_smm_save_state_64(vcpu, buf); else enter_smm_save_state_32(vcpu, buf); + /* + * Give pre_enter_smm() a chance to make ISA-specific changes to the + * vCPU state (e.g. leave guest mode) after we've saved the state into + * the SMM state-save area. + */ + kvm_x86_ops->pre_enter_smm(vcpu, buf); + + vcpu->arch.hflags |= HF_SMM_MASK; kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); if (kvm_x86_ops->get_nmi_mask(vcpu)) @@ -6740,6 +6764,20 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush(vcpu); } +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + unsigned long apic_address; + + /* + * The physical address of apic access page is stored in the VMCS. + * Update it when it becomes invalid. + */ + apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (start <= apic_address && apic_address < end) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { struct page *page = NULL; @@ -6876,17 +6914,23 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (inject_pending_event(vcpu, req_int_win) != 0) req_immediate_exit = true; else { - /* Enable NMI/IRQ window open exits if needed. + /* Enable SMI/NMI/IRQ window open exits if needed. * - * SMIs have two cases: 1) they can be nested, and - * then there is nothing to do here because RSM will - * cause a vmexit anyway; 2) or the SMI can be pending - * because inject_pending_event has completed the - * injection of an IRQ or NMI from the previous vmexit, - * and then we request an immediate exit to inject the SMI. + * SMIs have three cases: + * 1) They can be nested, and then there is nothing to + * do here because RSM will cause a vmexit anyway. + * 2) There is an ISA-specific reason why SMI cannot be + * injected, and the moment when this changes can be + * intercepted. + * 3) Or the SMI can be pending because + * inject_pending_event has completed the injection + * of an IRQ or NMI from the previous vmexit, and + * then we request an immediate exit to inject the + * SMI. */ if (vcpu->arch.smi_pending && !is_smm(vcpu)) - req_immediate_exit = true; + if (!kvm_x86_ops->enable_smi_window(vcpu)) + req_immediate_exit = true; if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) @@ -6908,7 +6952,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt @@ -7221,14 +7264,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; - fpu__initialize(fpu); + kvm_sigset_activate(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_load_guest_fpu(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7270,9 +7310,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = vcpu_run(vcpu); out: + kvm_put_guest_fpu(vcpu); post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } @@ -7340,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) #endif kvm_rip_write(vcpu, regs->rip); - kvm_set_rflags(vcpu, regs->rflags); + kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED); vcpu->arch.exception.pending = false; @@ -7454,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); +int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { + /* + * When EFER.LME and CR0.PG are set, the processor is in + * 64-bit mode (though maybe in a 32-bit code segment). + * CR4.PAE and EFER.LMA must be set. + */ + if (!(sregs->cr4 & X86_CR4_PAE) + || !(sregs->efer & EFER_LMA)) + return -EINVAL; + } else { + /* + * Not in 64-bit mode: EFER.LMA is clear and the code + * segment cannot be 64-bit. + */ + if (sregs->efer & EFER_LMA || sregs->cs.l) + return -EINVAL; + } + + return 0; +} + int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { @@ -7466,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, (sregs->cr4 & X86_CR4_OSXSAVE)) return -EINVAL; + if (kvm_valid_sregs(vcpu, sregs)) + return -EINVAL; + apic_base_msr.data = sregs->apic_base; apic_base_msr.host_initiated = true; if (kvm_set_apic_base(vcpu, &apic_base_msr)) @@ -7663,32 +7729,25 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } +/* Swap (qemu) user FPU context for the guest FPU context. */ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - if (vcpu->guest_fpu_loaded) - return; - - /* - * Restore all possible states in the guest, - * and assume host would use all available bits. - * Guest xcr0 would be loaded later. - */ - vcpu->guest_fpu_loaded = 1; - __kernel_fpu_begin(); + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, ~XFEATURE_MASK_PKRU); + preempt_enable(); trace_kvm_fpu(1); } +/* When vcpu_run ends, restore user space FPU context. */ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - if (!vcpu->guest_fpu_loaded) - return; - - vcpu->guest_fpu_loaded = 0; + preempt_disable(); copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - __kernel_fpu_end(); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -7798,18 +7857,43 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_async_pf_hash_reset(vcpu); vcpu->arch.apf.halted = false; + if (kvm_mpx_supported()) { + void *mpx_state_buffer; + + /* + * To avoid have the INIT path from kvm_apic_has_events() that be + * called with loaded FPU and does not let userspace fix the state. + */ + if (init_event) + kvm_put_guest_fpu(vcpu); + mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, + XFEATURE_MASK_BNDREGS); + if (mpx_state_buffer) + memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); + mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave, + XFEATURE_MASK_BNDCSR); + if (mpx_state_buffer) + memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); + if (init_event) + kvm_load_guest_fpu(vcpu); + } + if (!init_event) { kvm_pmu_reset(vcpu); vcpu->arch.smbase = 0x30000; vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; vcpu->arch.msr_misc_features_enables = 0; + + vcpu->arch.xcr0 = XFEATURE_MASK_FP; } memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; + vcpu->arch.ia32_xss = 0; + kvm_x86_ops->vcpu_reset(vcpu, init_event); } @@ -7974,16 +8058,11 @@ EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu); int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct page *page; - struct kvm *kvm; int r; - BUG_ON(vcpu->kvm == NULL); - kvm = vcpu->kvm; - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); - vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) + if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; @@ -8001,7 +8080,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) if (r < 0) goto fail_free_pio_data; - if (irqchip_in_kernel(kvm)) { + if (irqchip_in_kernel(vcpu->kvm)) { r = kvm_create_lapic(vcpu); if (r < 0) goto fail_mmu_destroy; @@ -8023,10 +8102,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) fx_init(vcpu); - vcpu->arch.ia32_tsc_adjust_msr = 0x0; - vcpu->arch.pv_time_enabled = false; - - vcpu->arch.guest_supported_xcr0 = 0; vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); |