diff options
44 files changed, 399 insertions, 303 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2336dd26ece4..429c6c624861 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8611,14 +8611,12 @@ F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64) -M: Christoffer Dall <christoffer.dall@arm.com> M: Marc Zyngier <marc.zyngier@arm.com> R: James Morse <james.morse@arm.com> R: Julien Thierry <julien.thierry@arm.com> R: Suzuki K Pouloze <suzuki.poulose@arm.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu -W: http://systems.cs.columbia.edu/projects/kvm-arm T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git S: Maintained F: arch/arm/include/uapi/asm/kvm* diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index d2b5ec9c4b92..ba88b1eca93c 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -11,6 +11,7 @@ CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve) obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += cp15-sr.o diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2a8d3f8ca22c..4bcd9c1291d5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -592,9 +592,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); -void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt); -bool __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt); - void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu); void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu); #else diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 82d1904328ad..ea710f674cb6 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,6 +10,7 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 22b4c335e0b2..8799e0c267d4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -16,6 +16,7 @@ */ #include <linux/arm-smccc.h> +#include <linux/kvm_host.h> #include <linux/types.h> #include <linux/jump_label.h> #include <uapi/linux/psci.h> @@ -505,6 +506,44 @@ static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) #endif } +/** + * Disable host events, enable guest events + */ +static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenclr_el0); + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenset_el0); + + return (pmu->events_host || pmu->events_guest); +} + +/** + * Disable guest events, enable host events + */ +static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) +{ + struct kvm_host_data *host; + struct kvm_pmu_events *pmu; + + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); + pmu = &host->pmu_events; + + if (pmu->events_guest) + write_sysreg(pmu->events_guest, pmcntenclr_el0); + + if (pmu->events_host) + write_sysreg(pmu->events_host, pmcntenset_el0); +} + /* Switch to the guest for VHE systems running in EL2 */ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 3da94a5bb6b7..e71d00bb5271 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -53,44 +53,6 @@ void kvm_clr_pmu_events(u32 clr) ctx->pmu_events.events_guest &= ~clr; } -/** - * Disable host events, enable guest events - */ -bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenclr_el0); - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenset_el0); - - return (pmu->events_host || pmu->events_guest); -} - -/** - * Disable guest events, enable host events - */ -void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) -{ - struct kvm_host_data *host; - struct kvm_pmu_events *pmu; - - host = container_of(host_ctxt, struct kvm_host_data, host_ctxt); - pmu = &host->pmu_events; - - if (pmu->events_guest) - write_sysreg(pmu->events_guest, pmcntenclr_el0); - - if (pmu->events_host) - write_sysreg(pmu->events_host, pmcntenset_el0); -} - #define PMEVTYPER_READ_CASE(idx) \ case idx: \ return read_sysreg(pmevtyper##idx##_el0) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bdbc81b5bc91..2b00a3ebee08 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -36,7 +36,7 @@ */ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 4096 -#define KVM_HALT_POLL_NS_DEFAULT 80000 +#define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ #define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8d6d75db8de6..e5e8eb29e68e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -181,7 +181,7 @@ MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* maximum percentage of steal time for polling. >100 is treated like 100 */ static u8 halt_poll_max_steal = 10; module_param(halt_poll_max_steal, byte, 0644); -MODULE_PARM_DESC(hpage, "Maximum percentage of steal time to allow polling"); +MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -4524,21 +4524,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int rc; - - /* If the basics of the memslot do not change, we do not want - * to update the gmap. Every update causes several unnecessary - * segment translation exceptions. This is usually handled just - * fine by the normal fault handler + gmap, but it will also - * cause faults on the prefix page of running guest CPUs. - */ - if (old->userspace_addr == mem->userspace_addr && - old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && - old->npages * PAGE_SIZE == mem->memory_size) - return; + int rc = 0; - rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, - mem->guest_phys_addr, mem->memory_size); + switch (change) { + case KVM_MR_DELETE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + break; + case KVM_MR_MOVE: + rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, + old->npages * PAGE_SIZE); + if (rc) + break; + /* FALLTHROUGH */ + case KVM_MR_CREATE: + rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, + mem->guest_phys_addr, mem->memory_size); + break; + case KVM_MR_FLAGS_ONLY: + break; + default: + WARN(1, "Unknown KVM MR CHANGE: %d\n", change); + } if (rc) pr_warn("failed to commit memory region\n"); return; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 80a642a0143d..e18a9f9f65b5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -456,8 +456,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } - /* function 4 has additional index. */ - case 4: { + /* functions 4 and 0x8000001d have additional index. */ + case 4: + case 0x8000001d: { int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -701,8 +702,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx = entry->edx = 0; break; case 0x8000001a: - break; - case 0x8000001d: + case 0x8000001e: break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index c19c7ede9bd6..a2f3432ce090 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -9,12 +9,22 @@ */ #include <linux/kvm_host.h> #include <linux/debugfs.h> +#include "lapic.h" bool kvm_arch_has_vcpu_debugfs(void) { return true; } +static int vcpu_get_timer_advance_ns(void *data, u64 *val) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; + *val = vcpu->arch.apic->lapic_timer.timer_advance_ns; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n"); + static int vcpu_get_tsc_offset(void *data, u64 *val) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; @@ -51,6 +61,14 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) if (!ret) return -ENOMEM; + if (lapic_in_kernel(vcpu)) { + ret = debugfs_create_file("lapic_timer_advance_ns", 0444, + vcpu->debugfs_dentry, + vcpu, &vcpu_timer_advance_ns_fops); + if (!ret) + return -ENOMEM; + } + if (kvm_has_tsc_control) { ret = debugfs_create_file("tsc-scaling-ratio", 0444, vcpu->debugfs_dentry, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index faa264822cee..007bc654f928 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -172,3 +172,10 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); } + +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE; + + return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); +} diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d5005cc26521..fd210cdd4983 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -114,6 +114,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return mode != KVM_IRQCHIP_NONE; } +bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index e39741997893..dd745b58ffd8 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -283,7 +283,7 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) bool fast_mode = idx & (1u << 31); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - u64 ctr_val; + u64 mask = fast_mode ? ~0u : ~0ull; if (!pmu->version) return 1; @@ -291,15 +291,11 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) if (is_vmware_backdoor_pmc(idx)) return kvm_pmu_rdpmc_vmware(vcpu, idx, data); - pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask); if (!pmc) return 1; - ctr_val = pmc_read_counter(pmc); - if (fast_mode) - ctr_val = (u32)ctr_val; - - *data = ctr_val; + *data = pmc_read_counter(pmc) & mask; return 0; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ba8898e1a854..22dff661145a 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -25,7 +25,8 @@ struct kvm_pmu_ops { unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); - struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); + struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx, + u64 *mask); int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 1495a735b38e..d3118088f1cd 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -186,7 +186,7 @@ static int amd_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } /* idx is the ECX register of RDPMC instruction */ -static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx) +static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *counters; @@ -269,10 +269,10 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; + pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->nr_arch_fixed_counters = 0; - pmu->version = 0; pmu->global_status = 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a849dcb7fbc5..735b8c01895e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -379,6 +379,9 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static bool __read_mostly dump_invalid_vmcb = 0; +module_param(dump_invalid_vmcb, bool, 0644); + static u8 rsm_ins_bytes[] = "\x0f\xaa"; static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -2024,7 +2027,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) + /* + * Since the host physical APIC id is 8 bits, + * we can support host APIC ID upto 255. + */ + if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); @@ -4824,6 +4831,11 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; + if (!dump_invalid_vmcb) { + pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n"); + return; + } + pr_err("VMCB Control Area:\n"); pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); @@ -4982,7 +4994,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; - pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); dump_vmcb(vcpu); return 0; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f1a69117ac0f..1032f068f0b9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2784,14 +2784,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) : "cc", "memory" ); - preempt_enable(); - if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + preempt_enable(); WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -2803,6 +2802,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) local_irq_enable(); if (hw_breakpoint_active()) set_debugreg(__this_cpu_read(cpu_dr7), 7); + preempt_enable(); /* * A non-failing VMEntry means we somehow entered guest mode with @@ -5423,39 +5423,44 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return 0; + vmx->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + + ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12)) - return -EINVAL; + if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) + goto error_guest_mode; if (copy_from_user(shadow_vmcs12, user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) - return -EFAULT; + sizeof(*vmcs12))) { + ret = -EFAULT; + goto error_guest_mode; + } if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION || !shadow_vmcs12->hdr.shadow_vmcs) - return -EINVAL; + goto error_guest_mode; } if (nested_vmx_check_controls(vcpu, vmcs12) || nested_vmx_check_host_state(vcpu, vmcs12) || nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) - return -EINVAL; + goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; - vmx->nested.nested_run_pending = - !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); - ret = nested_vmx_enter_non_root_mode(vcpu, false); - if (ret) { - vmx->nested.nested_run_pending = 0; - return -EINVAL; - } + if (ret) + goto error_guest_mode; return 0; + +error_guest_mode: + vmx->nested.nested_run_pending = 0; + return ret; } void nested_vmx_vcpu_setup(void) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f8502c376b37..a99613a060dd 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -126,7 +126,7 @@ static int intel_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) } static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, - unsigned idx) + unsigned idx, u64 *mask) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); @@ -138,6 +138,7 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, if (fixed && idx >= pmu->nr_arch_fixed_counters) return NULL; counters = fixed ? pmu->fixed_counters : pmu->gp_counters; + *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; return &counters[idx]; } @@ -183,9 +184,13 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) *data = pmu->global_ovf_ctrl; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - *data = pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + u64 val = pmc_read_counter(pmc); + *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { *data = pmc->eventsel; @@ -235,11 +240,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || - (pmc = get_fixed_pmc(pmu, msr))) { - if (!msr_info->host_initiated) - data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if (msr_info->host_initiated) + pmc->counter = data; + else + pmc->counter = (s32)data; + return 0; + } else if ((pmc = get_fixed_pmc(pmu, msr))) { + pmc->counter = data; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1ac167614032..b93e36ddee5e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -114,6 +114,9 @@ static u64 __read_mostly host_xss; bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +static bool __read_mostly dump_invalid_vmcs = 0; +module_param(dump_invalid_vmcs, bool, 0644); + #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 @@ -5607,15 +5610,24 @@ static void vmx_dump_dtsel(char *name, uint32_t limit) void dump_vmcs(void) { - u32 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); - u32 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); - u32 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); - u32 secondary_exec_control = 0; - unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_read64(GUEST_IA32_EFER); + u32 vmentry_ctl, vmexit_ctl; + u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; + unsigned long cr4; + u64 efer; int i, n; + if (!dump_invalid_vmcs) { + pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); + return; + } + + vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS); + vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS); + cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); + cr4 = vmcs_readl(GUEST_CR4); + efer = vmcs_read64(GUEST_IA32_EFER); + secondary_exec_control = 0; if (cpu_has_secondary_exec_ctrls()) secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 63d37ccce3dc..61128b48c503 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -319,6 +319,7 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); #define POSTED_INTR_ON 0 #define POSTED_INTR_SN 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 536b78c4af6e..acb179f78fdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -143,7 +143,7 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); * tuning, i.e. allows priveleged userspace to set an exact advancement time. */ static int __read_mostly lapic_timer_advance_ns = -1; -module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); +module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); @@ -1298,7 +1298,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 efer = msr_info->data; if (efer & efer_reserved_bits) - return false; + return 1; if (!msr_info->host_initiated) { if (!__kvm_valid_efer(vcpu, efer)) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 2ed395b817cb..bc508dae286c 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -575,8 +575,12 @@ class TracepointProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self.fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field) or - ARCH.tracepoint_is_child(field)] + if self.is_field_wanted(fields_filter, field)] + # add parents for child fields - otherwise we won't see any output! + for field in self._fields: + parent = ARCH.tracepoint_is_child(field) + if (parent and parent not in self._fields): + self.fields.append(parent) @staticmethod def _get_online_cpus(): @@ -735,8 +739,12 @@ class DebugfsProvider(Provider): def update_fields(self, fields_filter): """Refresh fields, applying fields_filter""" self._fields = [field for field in self._get_available_fields() - if self.is_field_wanted(fields_filter, field) or - ARCH.debugfs_is_child(field)] + if self.is_field_wanted(fields_filter, field)] + # add parents for child fields - otherwise we won't see any output! + for field in self._fields: + parent = ARCH.debugfs_is_child(field) + if (parent and parent not in self._fields): + self.fields.append(parent) @property def fields(self): diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 0811d860fe75..c057ba52364e 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -34,6 +34,8 @@ INTERACTIVE COMMANDS *c*:: clear filter *f*:: filter by regular expression + :: *Note*: Child events pull in their parents, and parents' stats summarize + all child events, not just the filtered ones *g*:: filter by guest name/PID diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a035295e54d7..41280dc06297 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -35,7 +35,9 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. +CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index f50a15c38f9b..fc27f890155b 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -131,6 +131,7 @@ static void *vcpu_worker(void *data) while (!READ_ONCE(host_quit)) { /* Let the guest dirty the random pages */ ret = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { pages_count += TEST_PAGES_PER_LOOP; generate_random_array(guest_array, TEST_PAGES_PER_LOOP); @@ -292,7 +293,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 3; + guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); @@ -426,8 +427,11 @@ int main(int argc, char *argv[]) unsigned long interval = TEST_HOST_LOOP_INTERVAL; bool mode_selected = false; uint64_t phys_offset = 0; - unsigned int mode, host_ipa_limit; + unsigned int mode; int opt, i; +#ifdef __aarch64__ + unsigned int host_ipa_limit; +#endif #ifdef USE_CLEAR_DIRTY_LOG if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 8c6b9619797d..a5a4b28f14d8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -118,10 +118,12 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); +#ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state); int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state, bool ignore_error); +#endif const char *exit_reason_str(unsigned int exit_reason); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index e8c42506a09d..19e667911496 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -7,6 +7,8 @@ #define _GNU_SOURCE /* for program_invocation_name */ +#include <linux/compiler.h> + #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" @@ -67,15 +69,13 @@ static uint64_t ptrs_per_pgd(struct kvm_vm *vm) return 1 << (vm->va_bits - shift); } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - int rc; - if (!vm->pgd_created) { vm_paddr_t paddr = vm_phy_pages_alloc(vm, page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, @@ -181,6 +181,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) unmapped_gva: TEST_ASSERT(false, "No mapping for vm virtual address, " "gva: 0x%lx", gva); + exit(1); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) @@ -226,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; struct kvm_vm *vm; - vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); vm_vcpu_add_default(vm, vcpuid, guest_code); @@ -312,6 +313,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate); get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc); - fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n", + fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e9113857f44e..633b22df46a4 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -135,7 +135,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm, unsigned long type) { struct kvm_vm *vm; - int kvm_fd; vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -556,7 +555,6 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, uint32_t flags) { int ret; - unsigned long pmem_size = 0; struct userspace_mem_region *region; size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; @@ -1250,6 +1248,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, ret, errno); } +#ifdef __x86_64__ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_nested_state *state) { @@ -1281,6 +1280,7 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, return ret; } +#endif /* * VM VCPU System Regs Get @@ -1334,7 +1334,6 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c index a2ab38be2f47..b701a01cfcb6 100644 --- a/tools/testing/selftests/kvm/lib/ucall.c +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -142,7 +142,7 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) vm_vaddr_t gva; TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, "Unexpected ucall exit mmio address access"); - gva = *(vm_vaddr_t *)run->mmio.data; + memcpy(&gva, run->mmio.data, sizeof(gva)); memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc)); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index dc7fae9fa424..21f3040d90cb 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -229,8 +229,6 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - int rc; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -549,7 +547,6 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryPointerEntry *pdpe; struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - void *hva; TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -582,6 +579,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) unmapped_gva: TEST_ASSERT(false, "No mapping for vm virtual address, " "gva: 0x%lx", gva); + exit(EXIT_FAILURE); } static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 7c2c4d4055a8..63cc9c3f5ab6 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -87,6 +87,7 @@ int main(int argc, char *argv[]) while (1) { rc = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index 36669684eca5..b38260e29775 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -19,8 +19,6 @@ #define VCPU_ID 5 -static bool have_nested_state; - void l2_guest_code(void) { GUEST_SYNC(6); @@ -73,7 +71,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; @@ -88,8 +85,6 @@ int main(int argc, char *argv[]) .args[0] = (unsigned long)&evmcs_ver }; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); @@ -113,7 +108,7 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (stage = 1;; stage++) { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 9a21e912097c..f72b3043db0e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -52,15 +52,11 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(entry->index == 0, ".index field should be zero"); - TEST_ASSERT(entry->index == 0, - ".index field should be zero"); - TEST_ASSERT(entry->flags == 0, ".flags field should be zero"); - TEST_ASSERT(entry->padding[0] == entry->padding[1] - == entry->padding[2] == 0, - ".index field should be zero"); + TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && + !entry->padding[2], "padding should be zero"); /* * If needed for debug: @@ -90,7 +86,6 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) { int nent = 20; /* should be enough */ static struct kvm_cpuid2 *cpuid; - int ret; cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2)); diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eb3e7a838cb4..40050e44ec0a 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -81,7 +81,6 @@ static void test_msr_platform_info_disabled(struct kvm_vm *vm) int main(int argc, char *argv[]) { struct kvm_vm *vm; - struct kvm_run *state; int rv; uint64_t msr_platform_info; diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index fb8086964d83..4daf520bada1 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -87,7 +87,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs; @@ -115,7 +114,7 @@ int main(int argc, char *argv[]) vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA); if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { printf("will skip SMM test with VMX enabled\n"); diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index e0a3c0204b7c..2a4121f4de01 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -22,8 +22,6 @@ #define VCPU_ID 5 -static bool have_nested_state; - void l2_guest_code(void) { GUEST_SYNC(6); @@ -122,7 +120,6 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages = NULL; vm_vaddr_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; @@ -132,8 +129,6 @@ int main(int argc, char *argv[]) struct ucall uc; int stage; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -142,7 +137,7 @@ int main(int argc, char *argv[]) vcpu_regs_get(vm, VCPU_ID, ®s1); if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); } else { printf("will skip nested state checks\n"); diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index c8478ce9ea77..25cacd3316f6 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -25,9 +25,15 @@ void guest_code(void) { + /* + * use a callee-save register, otherwise the compiler + * saves it around the call to GUEST_SYNC. + */ + register u32 stage asm("rbx"); for (;;) { GUEST_SYNC(0); - asm volatile ("inc %r11"); + stage++; + asm volatile ("" : : "r" (stage)); } } @@ -147,7 +153,7 @@ int main(int argc, char *argv[]) compare_vcpu_events(&events, &run->s.regs.events); /* Set and verify various register values. */ - run->s.regs.regs.r11 = 0xBAD1DEA; + run->s.regs.regs.rbx = 0xBAD1DEA; run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ @@ -158,9 +164,9 @@ int main(int argc, char *argv[]) "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, "apic_base sync regs value incorrect 0x%llx.", run->s.regs.sregs.apic_base); @@ -179,15 +185,15 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xDEADBEEF; + run->s.regs.regs.rbx = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); /* Clear kvm_valid_regs bits and kvm_dirty_bits. * Verify s.regs values are not overwritten with existing guest values @@ -195,21 +201,21 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = 0; run->kvm_dirty_regs = 0; - run->s.regs.regs.r11 = 0xAAAA; - regs.r11 = 0xBAC0; + run->s.regs.regs.rbx = 0xAAAA; + regs.rbx = 0xBAC0; vcpu_regs_set(vm, VCPU_ID, ®s); rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBAC0 + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); + TEST_ASSERT(regs.rbx == 0xBAC0 + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten * with existing guest values but that guest values are overwritten @@ -217,19 +223,19 @@ int main(int argc, char *argv[]) */ run->kvm_valid_regs = 0; run->kvm_dirty_regs = TEST_SYNC_FIELDS; - run->s.regs.regs.r11 = 0xBBBB; + run->s.regs.regs.rbx = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, - "r11 sync regs value incorrect 0x%llx.", - run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, + "rbx sync regs value incorrect 0x%llx.", + run->s.regs.regs.rbx); vcpu_regs_get(vm, VCPU_ID, ®s); - TEST_ASSERT(regs.r11 == 0xBBBB + 1, - "r11 guest value incorrect 0x%llx.", - regs.r11); + TEST_ASSERT(regs.rbx == 0xBBBB + 1, + "rbx guest value incorrect 0x%llx.", + regs.rbx); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 6edec6fd790b..97182b47b10c 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -39,8 +39,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); @@ -55,7 +53,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages; vm_vaddr_t vmx_pages_gva; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -68,7 +65,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (;;) { diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 61a2163cf9f1..9d62e2c7e024 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -75,7 +75,7 @@ void set_revision_id_for_vmcs12(struct kvm_nested_state *state, u32 vmcs12_revision) { /* Set revision_id in vmcs12 to vmcs12_revision. */ - *(u32 *)(state->data) = vmcs12_revision; + memcpy(state->data, &vmcs12_revision, sizeof(u32)); } void set_default_state(struct kvm_nested_state *state) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 18fa64db0d7a..6d37a3173956 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -121,7 +121,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -void report(int64_t val) +static void report(int64_t val) { printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); @@ -129,7 +129,6 @@ void report(int64_t val) int main(int argc, char *argv[]) { - struct vmx_pages *vmx_pages; vm_vaddr_t vmx_pages_gva; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -142,7 +141,7 @@ int main(int argc, char *argv[]) vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (;;) { diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 5abbe9b3c652..6880236974b8 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -26,127 +26,6 @@ #include <asm/kvm_hyp.h> /* - * stolen from arch/arm/kernel/opcodes.c - * - * condition code lookup table - * index into the table is test code: EQ, NE, ... LT, GT, AL, NV - * - * bit position in short is condition code: NZCV - */ -static const unsigned short cc_map[16] = { - 0xF0F0, /* EQ == Z set */ - 0x0F0F, /* NE */ - 0xCCCC, /* CS == C set */ - 0x3333, /* CC */ - 0xFF00, /* MI == N set */ - 0x00FF, /* PL */ - 0xAAAA, /* VS == V set */ - 0x5555, /* VC */ - 0x0C0C, /* HI == C set && Z clear */ - 0xF3F3, /* LS == C clear || Z set */ - 0xAA55, /* GE == (N==V) */ - 0x55AA, /* LT == (N!=V) */ - 0x0A05, /* GT == (!Z && (N==V)) */ - 0xF5FA, /* LE == (Z || (N!=V)) */ - 0xFFFF, /* AL always */ - 0 /* NV */ -}; - -/* - * Check if a trapped instruction should have been executed or not. - */ -bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) -{ - unsigned long cpsr; - u32 cpsr_cond; - int cond; - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) - return true; - - /* Is condition field valid? */ - cond = kvm_vcpu_get_condition(vcpu); - if (cond == 0xE) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - if (cond < 0) { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - cpsr_cond = cpsr >> 28; - - if (!((cc_map[cond] >> cpsr_cond) & 1)) - return false; - - return true; -} - -/** - * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block - * @vcpu: The VCPU pointer - * - * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have - * to do this little bit of work manually. The fields map like this: - * - * IT[7:0] -> CPSR[26:25],CPSR[15:10] - */ -static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) -{ - unsigned long itbits, cond; - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & PSR_AA32_T_BIT); - - if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) - return; - - cond = (cpsr & 0xe000) >> 13; - itbits = (cpsr & 0x1c00) >> (10 - 2); - itbits |= (cpsr & (0x3 << 25)) >> 25; - - /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ - if ((itbits & 0x7) == 0) - itbits = cond = 0; - else - itbits = (itbits << 1) & 0x1f; - - cpsr &= ~PSR_AA32_IT_MASK; - cpsr |= cond << 13; - cpsr |= (itbits & 0x1c) << (10 - 2); - cpsr |= (itbits & 0x3) << 25; - *vcpu_cpsr(vcpu) = cpsr; -} - -/** - * kvm_skip_instr - skip a trapped instruction and proceed to the next - * @vcpu: The vcpu pointer - */ -void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - bool is_thumb; - - is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; - else - *vcpu_pc(vcpu) += 4; - kvm_adjust_itstate(vcpu); -} - -/* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ static const u8 return_offsets[8][2] = { diff --git a/virt/kvm/arm/hyp/aarch32.c b/virt/kvm/arm/hyp/aarch32.c new file mode 100644 index 000000000000..d31f267961e7 --- /dev/null +++ b/virt/kvm/arm/hyp/aarch32.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + *vcpu_pc(vcpu) += 2; + else + *vcpu_pc(vcpu) += 4; + kvm_adjust_itstate(vcpu); +} diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 001aeda4c154..3972a9564c76 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -44,6 +44,12 @@ static struct workqueue_struct *irqfd_cleanup_wq; +bool __attribute__((weak)) +kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) +{ + return true; +} + static void irqfd_inject(struct work_struct *work) { @@ -297,6 +303,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) if (!kvm_arch_intc_initialized(kvm)) return -EAGAIN; + if (!kvm_arch_irqfd_allowed(kvm, args)) + return -EINVAL; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT); if (!irqfd) return -ENOMEM; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0d13d9d125d..134ec0283a8a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -52,6 +52,7 @@ #include <linux/sort.h> #include <linux/bsearch.h> #include <linux/io.h> +#include <linux/lockdep.h> #include <asm/processor.h> #include <asm/ioctl.h> @@ -1760,8 +1761,10 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, if (pfn_valid(pfn)) { page = pfn_to_page(pfn); hva = kmap(page); +#ifdef CONFIG_HAS_IOMEM } else { hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); +#endif } if (!hva) @@ -4181,7 +4184,9 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - lockdep_assert_held(&kvm_count_lock); +#ifdef CONFIG_LOCKDEP + WARN_ON(lockdep_is_held(&kvm_count_lock)); +#endif hardware_enable_nolock(NULL); } } |