summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2022-07-29 09:46:01 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2022-08-01 03:21:00 -0400
commit63f4b210414b65aa3103c54369cacbd0b1bdf02f (patch)
tree2dc7b490d3a89306669c70256a41764ca52ab3b3 /arch/x86/kvm/x86.c
parent2e2e91158febfeb73b5d4f249440218304f34101 (diff)
parent7edc3a68038ab151a8791ddb6217755a5e4a5809 (diff)
downloadlinux-63f4b210414b65aa3103c54369cacbd0b1bdf02f.tar.bz2
Merge remote-tracking branch 'kvm/next' into kvm-next-5.20
KVM/s390, KVM/x86 and common infrastructure changes for 5.20 x86: * Permit guests to ignore single-bit ECC errors * Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache * Intel IPI virtualization * Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS * PEBS virtualization * Simplify PMU emulation by just using PERF_TYPE_RAW events * More accurate event reinjection on SVM (avoid retrying instructions) * Allow getting/setting the state of the speaker port data bit * Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent * "Notify" VM exit (detect microarchitectural hangs) for Intel * Cleanups for MCE MSR emulation s390: * add an interface to provide a hypervisor dump for secure guests * improve selftests to use TAP interface * enable interpretive execution of zPCI instructions (for PCI passthrough) * First part of deferred teardown * CPU Topology * PV attestation * Minor fixes Generic: * new selftests API using struct kvm_vcpu instead of a (vm, id) tuple x86: * Use try_cmpxchg64 instead of cmpxchg64 * Bugfixes * Ignore benign host accesses to PMU MSRs when PMU is disabled * Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior * x86/MMU: Allow NX huge pages to be disabled on a per-vm basis * Port eager page splitting to shadow MMU as well * Enable CMCI capability by default and handle injected UCNA errors * Expose pid of vcpu threads in debugfs * x2AVIC support for AMD * cleanup PIO emulation * Fixes for LLDT/LTR emulation * Don't require refcounted "struct page" to create huge SPTEs x86 cleanups: * Use separate namespaces for guest PTEs and shadow PTEs bitmasks * PIO emulation * Reorganize rmap API, mostly around rmap destruction * Do not workaround very old KVM bugs for L0 that runs with nesting enabled * new selftests API for CPUID
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c704
1 files changed, 470 insertions, 234 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e5fa335a4ea7..33560bfa0cac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,8 +87,11 @@
#define MAX_IO_MSRS 256
#define KVM_MAX_MCE_BANKS 32
-u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
-EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
+
+struct kvm_caps kvm_caps __read_mostly = {
+ .supported_mce_cap = MCG_CTL_P | MCG_SER_P,
+};
+EXPORT_SYMBOL_GPL(kvm_caps);
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
@@ -151,19 +154,6 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly kvmclock_periodic_sync = true;
module_param(kvmclock_periodic_sync, bool, S_IRUGO);
-bool __read_mostly kvm_has_tsc_control;
-EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
-u32 __read_mostly kvm_max_guest_tsc_khz;
-EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
-u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
-EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
-u64 __read_mostly kvm_max_tsc_scaling_ratio;
-EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
-u64 __read_mostly kvm_default_tsc_scaling_ratio;
-EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
-bool __read_mostly kvm_has_bus_lock_exit;
-EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
-
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
@@ -235,8 +225,6 @@ EXPORT_SYMBOL_GPL(enable_apicv);
u64 __read_mostly host_xss;
EXPORT_SYMBOL_GPL(host_xss);
-u64 __read_mostly supported_xss;
-EXPORT_SYMBOL_GPL(supported_xss);
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
@@ -298,7 +286,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
STATS_DESC_COUNTER(VCPU, preemption_reported),
STATS_DESC_COUNTER(VCPU, preemption_other),
- STATS_DESC_IBOOLEAN(VCPU, guest_mode)
+ STATS_DESC_IBOOLEAN(VCPU, guest_mode),
+ STATS_DESC_COUNTER(VCPU, notify_window_exits),
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -311,8 +300,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
};
u64 __read_mostly host_xcr0;
-u64 __read_mostly supported_xcr0;
-EXPORT_SYMBOL_GPL(supported_xcr0);
static struct kmem_cache *x86_emulator_cache;
@@ -862,7 +849,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
*/
real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
- if (real_gpa == UNMAPPED_GVA)
+ if (real_gpa == INVALID_GPA)
return 0;
/* Note the offset, PDPTRs are 32 byte aligned when using PAE paging. */
@@ -1094,7 +1081,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & cr4_reserved_bits)
return false;
@@ -1102,9 +1089,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return false;
- return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ return true;
+}
+EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
+
+static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+{
+ return __kvm_is_valid_cr4(vcpu, cr4) &&
+ static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
}
-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
{
@@ -1450,6 +1443,7 @@ static const u32 msrs_to_save_all[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+ MSR_IA32_PEBS_ENABLE, MSR_IA32_DS_AREA, MSR_PEBS_DATA_CFG,
MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3,
MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3,
@@ -2051,13 +2045,6 @@ int kvm_emulate_invd(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_invd);
-int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
-{
- pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
- return kvm_emulate_as_nop(vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
-
int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -2065,11 +2052,26 @@ int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
-int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
+
+static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
{
- pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
+ return kvm_handle_invalid_op(vcpu);
+
+ pr_warn_once("kvm: %s instruction emulated as NOP!\n", insn);
return kvm_emulate_as_nop(vcpu);
}
+int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_monitor_mwait(vcpu, "MWAIT");
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
+
+int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
+{
+ return kvm_emulate_monitor_mwait(vcpu, "MONITOR");
+}
EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
@@ -2349,12 +2351,12 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
/* Guest TSC same frequency as host TSC? */
if (!scale) {
- kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
+ kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
return 0;
}
/* TSC scaling supported? */
- if (!kvm_has_tsc_control) {
+ if (!kvm_caps.has_tsc_control) {
if (user_tsc_khz > tsc_khz) {
vcpu->arch.tsc_catchup = 1;
vcpu->arch.tsc_always_catchup = 1;
@@ -2366,10 +2368,10 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
}
/* TSC scaling required - calculate ratio */
- ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
+ ratio = mul_u64_u32_div(1ULL << kvm_caps.tsc_scaling_ratio_frac_bits,
user_tsc_khz, tsc_khz);
- if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
+ if (ratio == 0 || ratio >= kvm_caps.max_tsc_scaling_ratio) {
pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
user_tsc_khz);
return -1;
@@ -2387,7 +2389,7 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
/* tsc_khz can be zero if TSC calibration fails */
if (user_tsc_khz == 0) {
/* set tsc_scaling_ratio to a safe value */
- kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
+ kvm_vcpu_write_tsc_multiplier(vcpu, kvm_caps.default_tsc_scaling_ratio);
return -1;
}
@@ -2464,18 +2466,18 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
* (frac) represent the fractional part, ie. ratio represents a fixed
* point number (mult + frac * 2^(-N)).
*
- * N equals to kvm_tsc_scaling_ratio_frac_bits.
+ * N equals to kvm_caps.tsc_scaling_ratio_frac_bits.
*/
static inline u64 __scale_tsc(u64 ratio, u64 tsc)
{
- return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
+ return mul_u64_u64_shr(tsc, ratio, kvm_caps.tsc_scaling_ratio_frac_bits);
}
u64 kvm_scale_tsc(u64 tsc, u64 ratio)
{
u64 _tsc = tsc;
- if (ratio != kvm_default_tsc_scaling_ratio)
+ if (ratio != kvm_caps.default_tsc_scaling_ratio)
_tsc = __scale_tsc(ratio, tsc);
return _tsc;
@@ -2502,11 +2504,11 @@ u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
{
u64 nested_offset;
- if (l2_multiplier == kvm_default_tsc_scaling_ratio)
+ if (l2_multiplier == kvm_caps.default_tsc_scaling_ratio)
nested_offset = l1_offset;
else
nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
- kvm_tsc_scaling_ratio_frac_bits);
+ kvm_caps.tsc_scaling_ratio_frac_bits);
nested_offset += l2_offset;
return nested_offset;
@@ -2515,9 +2517,9 @@ EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
{
- if (l2_multiplier != kvm_default_tsc_scaling_ratio)
+ if (l2_multiplier != kvm_caps.default_tsc_scaling_ratio)
return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
- kvm_tsc_scaling_ratio_frac_bits);
+ kvm_caps.tsc_scaling_ratio_frac_bits);
return l1_multiplier;
}
@@ -2559,7 +2561,7 @@ static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multipli
else
vcpu->arch.tsc_scaling_ratio = l1_multiplier;
- if (kvm_has_tsc_control)
+ if (kvm_caps.has_tsc_control)
static_call(kvm_x86_write_tsc_multiplier)(
vcpu, vcpu->arch.tsc_scaling_ratio);
}
@@ -2695,7 +2697,7 @@ static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
{
- if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
+ if (vcpu->arch.l1_tsc_scaling_ratio != kvm_caps.default_tsc_scaling_ratio)
WARN_ON(adjustment < 0);
adjustment = kvm_scale_tsc((u64) adjustment,
vcpu->arch.l1_tsc_scaling_ratio);
@@ -3108,7 +3110,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
- if (kvm_has_tsc_control)
+ if (kvm_caps.has_tsc_control)
tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
v->arch.l1_tsc_scaling_ratio);
@@ -3198,6 +3200,16 @@ static void kvmclock_sync_fn(struct work_struct *work)
KVMCLOCK_SYNC_PERIOD);
}
+/* These helpers are safe iff @msr is known to be an MCx bank MSR. */
+static bool is_mci_control_msr(u32 msr)
+{
+ return (msr & 3) == 0;
+}
+static bool is_mci_status_msr(u32 msr)
+{
+ return (msr & 3) == 1;
+}
+
/*
* On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
*/
@@ -3216,6 +3228,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
unsigned bank_num = mcg_cap & 0xff;
u32 msr = msr_info->index;
u64 data = msr_info->data;
+ u32 offset, last_msr;
switch (msr) {
case MSR_IA32_MCG_STATUS:
@@ -3229,32 +3242,53 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.mcg_ctl = data;
break;
- default:
- if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = array_index_nospec(
- msr - MSR_IA32_MC0_CTL,
- MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
-
- /* only 0 or all 1s can be written to IA32_MCi_CTL
- * some Linux kernels though clear bit 10 in bank 4 to
- * workaround a BIOS/GART TBL issue on AMD K8s, ignore
- * this to avoid an uncatched #GP in the guest
- */
- if ((offset & 0x3) == 0 &&
- data != 0 && (data | (1 << 10)) != ~(u64)0)
- return -1;
-
- /* MCi_STATUS */
- if (!msr_info->host_initiated &&
- (offset & 0x3) == 1 && data != 0) {
- if (!can_set_mci_status(vcpu))
- return -1;
- }
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
+ last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
+ if (msr > last_msr)
+ return 1;
- vcpu->arch.mce_banks[offset] = data;
- break;
- }
+ if (!(mcg_cap & MCG_CMCI_P) && (data || !msr_info->host_initiated))
+ return 1;
+ /* An attempt to write a 1 to a reserved bit raises #GP */
+ if (data & ~(MCI_CTL2_CMCI_EN | MCI_CTL2_CMCI_THRESHOLD_MASK))
+ return 1;
+ offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
+ last_msr + 1 - MSR_IA32_MC0_CTL2);
+ vcpu->arch.mci_ctl2_banks[offset] = data;
+ break;
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
+ last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
+ if (msr > last_msr)
+ return 1;
+
+ /*
+ * Only 0 or all 1s can be written to IA32_MCi_CTL, all other
+ * values are architecturally undefined. But, some Linux
+ * kernels clear bit 10 in bank 4 to workaround a BIOS/GART TLB
+ * issue on AMD K8s, allow bit 10 to be clear when setting all
+ * other bits in order to avoid an uncaught #GP in the guest.
+ *
+ * UNIXWARE clears bit 0 of MC1_CTL to ignore correctable,
+ * single-bit ECC data errors.
+ */
+ if (is_mci_control_msr(msr) &&
+ data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+ return 1;
+
+ /*
+ * All CPUs allow writing 0 to MCi_STATUS MSRs to clear the MSR.
+ * AMD-based CPUs allow non-zero values, but if and only if
+ * HWCR[McStatusWrEn] is set.
+ */
+ if (!msr_info->host_initiated && is_mci_status_msr(msr) &&
+ data != 0 && !can_set_mci_status(vcpu))
+ return 1;
+
+ offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
+ last_msr + 1 - MSR_IA32_MC0_CTL);
+ vcpu->arch.mce_banks[offset] = data;
+ break;
+ default:
return 1;
}
return 0;
@@ -3538,7 +3572,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
break;
- case 0x200 ... 0x2ff:
+ case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
+ case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
@@ -3560,9 +3595,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.ia32_tsc_adjust_msr = data;
}
break;
- case MSR_IA32_MISC_ENABLE:
+ case MSR_IA32_MISC_ENABLE: {
+ u64 old_val = vcpu->arch.ia32_misc_enable_msr;
+
+ if (!msr_info->host_initiated) {
+ /* RO bits */
+ if ((old_val ^ data) & MSR_IA32_MISC_ENABLE_PMU_RO_MASK)
+ return 1;
+
+ /* R bits, i.e. writes are ignored, but don't fault. */
+ data = data & ~MSR_IA32_MISC_ENABLE_EMON;
+ data |= old_val & MSR_IA32_MISC_ENABLE_EMON;
+ }
+
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
- ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
+ ((old_val ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
@@ -3571,6 +3618,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.ia32_misc_enable_msr = data;
}
break;
+ }
case MSR_IA32_SMBASE:
if (!msr_info->host_initiated)
return 1;
@@ -3597,7 +3645,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* IA32_XSS[bit 8]. Guests have to use RDMSR/WRMSR rather than
* XSAVES/XRSTORS to save/restore PT MSRs.
*/
- if (data & ~supported_xss)
+ if (data & ~kvm_caps.supported_xss)
return 1;
vcpu->arch.ia32_xss = data;
kvm_update_cpuid_runtime(vcpu);
@@ -3695,6 +3743,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr_info);
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
@@ -3785,6 +3834,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.guest_fpu.xfd_err = data;
break;
#endif
+ case MSR_IA32_PEBS_ENABLE:
+ case MSR_IA32_DS_AREA:
+ case MSR_PEBS_DATA_CFG:
+ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ if (kvm_pmu_is_valid_msr(vcpu, msr))
+ return kvm_pmu_set_msr(vcpu, msr_info);
+ /*
+ * Userspace is allowed to write '0' to MSRs that KVM reports
+ * as to-be-saved, even if an MSRs isn't fully supported.
+ */
+ return !msr_info->host_initiated || data;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
@@ -3799,6 +3859,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap;
unsigned bank_num = mcg_cap & 0xff;
+ u32 offset, last_msr;
switch (msr) {
case MSR_IA32_P5_MC_ADDR:
@@ -3816,16 +3877,27 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
case MSR_IA32_MCG_STATUS:
data = vcpu->arch.mcg_status;
break;
- default:
- if (msr >= MSR_IA32_MC0_CTL &&
- msr < MSR_IA32_MCx_CTL(bank_num)) {
- u32 offset = array_index_nospec(
- msr - MSR_IA32_MC0_CTL,
- MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
+ last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
+ if (msr > last_msr)
+ return 1;
- data = vcpu->arch.mce_banks[offset];
- break;
- }
+ if (!(mcg_cap & MCG_CMCI_P) && !host)
+ return 1;
+ offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
+ last_msr + 1 - MSR_IA32_MC0_CTL2);
+ data = vcpu->arch.mci_ctl2_banks[offset];
+ break;
+ case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
+ last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
+ if (msr > last_msr)
+ return 1;
+
+ offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
+ last_msr + 1 - MSR_IA32_MC0_CTL);
+ data = vcpu->arch.mce_banks[offset];
+ break;
+ default:
return 1;
}
*pdata = data;
@@ -3865,9 +3937,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_DRAM_ENERGY_STATUS: /* DRAM controller */
msr_info->data = 0;
break;
+ case MSR_IA32_PEBS_ENABLE:
+ case MSR_IA32_DS_AREA:
+ case MSR_PEBS_DATA_CFG:
case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info);
+ /*
+ * Userspace is allowed to read MSRs that KVM reports as
+ * to-be-saved, even if an MSR isn't fully supported.
+ */
if (!msr_info->host_initiated)
return 1;
msr_info->data = 0;
@@ -3922,7 +4001,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
}
case MSR_MTRRcap:
- case 0x200 ... 0x2ff:
+ case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
+ case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
case 0xcd: /* fsb frequency */
msr_info->data = 3;
@@ -4038,6 +4118,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
+ case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated);
case MSR_IA32_XSS:
@@ -4280,6 +4361,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_LAST_CPU:
case KVM_CAP_X86_USER_SPACE_MSR:
@@ -4296,6 +4378,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SYS_ATTRIBUTES:
case KVM_CAP_VAPIC:
case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
r = 1;
break;
case KVM_CAP_EXIT_HYPERCALL:
@@ -4357,7 +4440,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_TSC_CONTROL:
case KVM_CAP_VM_TSC_CONTROL:
- r = kvm_has_tsc_control;
+ r = kvm_caps.has_tsc_control;
break;
case KVM_CAP_X2APIC_API:
r = KVM_X2APIC_API_VALID_FLAGS;
@@ -4379,7 +4462,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = sched_info_on();
break;
case KVM_CAP_X86_BUS_LOCK_EXIT:
- if (kvm_has_bus_lock_exit)
+ if (kvm_caps.has_bus_lock_exit)
r = KVM_BUS_LOCK_DETECTION_OFF |
KVM_BUS_LOCK_DETECTION_EXIT;
else
@@ -4388,17 +4471,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_XSAVE2: {
u64 guest_perm = xstate_get_guest_group_perm();
- r = xstate_required_size(supported_xcr0 & guest_perm, false);
+ r = xstate_required_size(kvm_caps.supported_xcr0 & guest_perm, false);
if (r < sizeof(struct kvm_xsave))
r = sizeof(struct kvm_xsave);
break;
+ }
case KVM_CAP_PMU_CAPABILITY:
r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0;
break;
- }
case KVM_CAP_DISABLE_QUIRKS2:
r = KVM_X86_VALID_QUIRKS;
break;
+ case KVM_CAP_X86_NOTIFY_VMEXIT:
+ r = kvm_caps.has_notify_vmexit;
+ break;
default:
break;
}
@@ -4426,7 +4512,7 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_X86_XCOMP_GUEST_SUPP:
- if (put_user(supported_xcr0, uaddr))
+ if (put_user(kvm_caps.supported_xcr0, uaddr))
return -EFAULT;
return 0;
default:
@@ -4503,8 +4589,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
}
case KVM_X86_GET_MCE_CAP_SUPPORTED:
r = -EFAULT;
- if (copy_to_user(argp, &kvm_mce_cap_supported,
- sizeof(kvm_mce_cap_supported)))
+ if (copy_to_user(argp, &kvm_caps.supported_mce_cap,
+ sizeof(kvm_caps.supported_mce_cap)))
goto out;
r = 0;
break;
@@ -4803,22 +4889,63 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
r = -EINVAL;
if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
goto out;
- if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
+ if (mcg_cap & ~(kvm_caps.supported_mce_cap | 0xff | 0xff0000))
goto out;
r = 0;
vcpu->arch.mcg_cap = mcg_cap;
/* Init IA32_MCG_CTL to all 1s */
if (mcg_cap & MCG_CTL_P)
vcpu->arch.mcg_ctl = ~(u64)0;
- /* Init IA32_MCi_CTL to all 1s */
- for (bank = 0; bank < bank_num; bank++)
+ /* Init IA32_MCi_CTL to all 1s, IA32_MCi_CTL2 to all 0s */
+ for (bank = 0; bank < bank_num; bank++) {
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+ if (mcg_cap & MCG_CMCI_P)
+ vcpu->arch.mci_ctl2_banks[bank] = 0;
+ }
+
+ kvm_apic_after_set_mcg_cap(vcpu);
static_call(kvm_x86_setup_mce)(vcpu);
out:
return r;
}
+/*
+ * Validate this is an UCNA (uncorrectable no action) error by checking the
+ * MCG_STATUS and MCi_STATUS registers:
+ * - none of the bits for Machine Check Exceptions are set
+ * - both the VAL (valid) and UC (uncorrectable) bits are set
+ * MCI_STATUS_PCC - Processor Context Corrupted
+ * MCI_STATUS_S - Signaled as a Machine Check Exception
+ * MCI_STATUS_AR - Software recoverable Action Required
+ */
+static bool is_ucna(struct kvm_x86_mce *mce)
+{
+ return !mce->mcg_status &&
+ !(mce->status & (MCI_STATUS_PCC | MCI_STATUS_S | MCI_STATUS_AR)) &&
+ (mce->status & MCI_STATUS_VAL) &&
+ (mce->status & MCI_STATUS_UC);
+}
+
+static int kvm_vcpu_x86_set_ucna(struct kvm_vcpu *vcpu, struct kvm_x86_mce *mce, u64* banks)
+{
+ u64 mcg_cap = vcpu->arch.mcg_cap;
+
+ banks[1] = mce->status;
+ banks[2] = mce->addr;
+ banks[3] = mce->misc;
+ vcpu->arch.mcg_status = mce->mcg_status;
+
+ if (!(mcg_cap & MCG_CMCI_P) ||
+ !(vcpu->arch.mci_ctl2_banks[mce->bank] & MCI_CTL2_CMCI_EN))
+ return 0;
+
+ if (lapic_in_kernel(vcpu))
+ kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTCMCI);
+
+ return 0;
+}
+
static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
struct kvm_x86_mce *mce)
{
@@ -4828,6 +4955,12 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
return -EINVAL;
+
+ banks += array_index_nospec(4 * mce->bank, 4 * bank_num);
+
+ if (is_ucna(mce))
+ return kvm_vcpu_x86_set_ucna(vcpu, mce, banks);
+
/*
* if IA32_MCG_CTL is not all 1s, the uncorrected error
* reporting is disabled
@@ -4835,7 +4968,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
vcpu->arch.mcg_ctl != ~(u64)0)
return 0;
- banks += 4 * mce->bank;
/*
* if IA32_MCi_CTL is not all 1s, the uncorrected error
* reporting is disabled for the bank
@@ -4941,6 +5073,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SMM);
if (vcpu->kvm->arch.exception_payload_enabled)
events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
+ if (vcpu->kvm->arch.triple_fault_event) {
+ events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ }
memset(&events->reserved, 0, sizeof(events->reserved));
}
@@ -4954,7 +5090,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
| KVM_VCPUEVENT_VALID_SIPI_VECTOR
| KVM_VCPUEVENT_VALID_SHADOW
| KVM_VCPUEVENT_VALID_SMM
- | KVM_VCPUEVENT_VALID_PAYLOAD))
+ | KVM_VCPUEVENT_VALID_PAYLOAD
+ | KVM_VCPUEVENT_VALID_TRIPLE_FAULT))
return -EINVAL;
if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
@@ -5027,6 +5164,15 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
}
}
+ if (events->flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) {
+ if (!vcpu->kvm->arch.triple_fault_event)
+ return -EINVAL;
+ if (events->triple_fault.pending)
+ kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ else
+ kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ }
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
@@ -5095,7 +5241,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
guest_xsave->region,
- supported_xcr0, &vcpu->arch.pkru);
+ kvm_caps.supported_xcr0,
+ &vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
@@ -5600,8 +5747,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
user_tsc_khz = (u32)arg;
- if (kvm_has_tsc_control &&
- user_tsc_khz >= kvm_max_guest_tsc_khz)
+ if (kvm_caps.has_tsc_control &&
+ user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
goto out;
if (user_tsc_khz == 0)
@@ -6028,6 +6175,10 @@ split_irqchip_unlock:
kvm->arch.exception_payload_enabled = cap->args[0];
r = 0;
break;
+ case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
+ kvm->arch.triple_fault_event = cap->args[0];
+ r = 0;
+ break;
case KVM_CAP_X86_USER_SPACE_MSR:
r = -EINVAL;
if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
@@ -6046,7 +6197,7 @@ split_irqchip_unlock:
(cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
break;
- if (kvm_has_bus_lock_exit &&
+ if (kvm_caps.has_bus_lock_exit &&
cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
kvm->arch.bus_lock_detection_enabled = true;
r = 0;
@@ -6109,6 +6260,65 @@ split_irqchip_unlock:
}
mutex_unlock(&kvm->lock);
break;
+ case KVM_CAP_MAX_VCPU_ID:
+ r = -EINVAL;
+ if (cap->args[0] > KVM_MAX_VCPU_IDS)
+ break;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.max_vcpu_ids == cap->args[0]) {
+ r = 0;
+ } else if (!kvm->arch.max_vcpu_ids) {
+ kvm->arch.max_vcpu_ids = cap->args[0];
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
+ case KVM_CAP_X86_NOTIFY_VMEXIT:
+ r = -EINVAL;
+ if ((u32)cap->args[0] & ~KVM_X86_NOTIFY_VMEXIT_VALID_BITS)
+ break;
+ if (!kvm_caps.has_notify_vmexit)
+ break;
+ if (!((u32)cap->args[0] & KVM_X86_NOTIFY_VMEXIT_ENABLED))
+ break;
+ mutex_lock(&kvm->lock);
+ if (!kvm->created_vcpus) {
+ kvm->arch.notify_window = cap->args[0] >> 32;
+ kvm->arch.notify_vmexit_flags = (u32)cap->args[0];
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
+ case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
+ r = -EINVAL;
+
+ /*
+ * Since the risk of disabling NX hugepages is a guest crashing
+ * the system, ensure the userspace process has permission to
+ * reboot the system.
+ *
+ * Note that unlike the reboot() syscall, the process must have
+ * this capability in the root namespace because exposing
+ * /dev/kvm into a container does not limit the scope of the
+ * iTLB multihit bug to that container. In other words,
+ * this must use capable(), not ns_capable().
+ */
+ if (!capable(CAP_SYS_BOOT)) {
+ r = -EPERM;
+ break;
+ }
+
+ if (cap->args[0])
+ break;
+
+ mutex_lock(&kvm->lock);
+ if (!kvm->created_vcpus) {
+ kvm->arch.disable_nx_huge_pages = true;
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ break;
default:
r = -EINVAL;
break;
@@ -6584,8 +6794,8 @@ set_pit2_out:
r = -EINVAL;
user_tsc_khz = (u32)arg;
- if (kvm_has_tsc_control &&
- user_tsc_khz >= kvm_max_guest_tsc_khz)
+ if (kvm_caps.has_tsc_control &&
+ user_tsc_khz >= kvm_caps.max_guest_tsc_khz)
goto out;
if (user_tsc_khz == 0)
@@ -6660,15 +6870,12 @@ out:
static void kvm_init_msr_list(void)
{
- struct x86_pmu_capability x86_pmu;
u32 dummy[2];
unsigned i;
BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3,
"Please update the fixed PMCs in msrs_to_saved_all[]");
- perf_get_x86_pmu_capability(&x86_pmu);
-
num_msrs_to_save = 0;
num_emulated_msrs = 0;
num_msr_based_features = 0;
@@ -6720,12 +6927,12 @@ static void kvm_init_msr_list(void)
break;
case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
- min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+ min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
- min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
+ min(INTEL_PMC_MAX_GENERIC, kvm_pmu_cap.num_counters_gp))
continue;
break;
case MSR_IA32_XFD:
@@ -6882,7 +7089,7 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
- if (gpa == UNMAPPED_GVA)
+ if (gpa == INVALID_GPA)
return X86EMUL_PROPAGATE_FAULT;
ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
offset, toread);
@@ -6913,7 +7120,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
/* Inline kvm_read_guest_virt_helper for speed. */
gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access|PFERR_FETCH_MASK,
exception);
- if (unlikely(gpa == UNMAPPED_GVA))
+ if (unlikely(gpa == INVALID_GPA))
return X86EMUL_PROPAGATE_FAULT;
offset = addr & (PAGE_SIZE-1);
@@ -6983,7 +7190,7 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
- if (gpa == UNMAPPED_GVA)
+ if (gpa == INVALID_GPA)
return X86EMUL_PROPAGATE_FAULT;
ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
if (ret < 0) {
@@ -7094,7 +7301,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
*gpa = mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
- if (*gpa == UNMAPPED_GVA)
+ if (*gpa == INVALID_GPA)
return -1;
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
@@ -7331,7 +7538,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
- if (gpa == UNMAPPED_GVA ||
+ if (gpa == INVALID_GPA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto emul_write;
@@ -7385,36 +7592,47 @@ emul_write:
return emulator_write_emulated(ctxt, addr, new, bytes, exception);
}
-static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
+static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *data,
+ unsigned int count, bool in)
{
- int r = 0, i;
+ unsigned i;
+ int r;
- for (i = 0; i < vcpu->arch.pio.count; i++) {
- if (vcpu->arch.pio.in)
- r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
- vcpu->arch.pio.size, pd);
+ WARN_ON_ONCE(vcpu->arch.pio.count);
+ for (i = 0; i < count; i++) {
+ if (in)
+ r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, port, size, data);
else
- r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
- vcpu->arch.pio.port, vcpu->arch.pio.size,
- pd);
- if (r)
+ r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, port, size, data);
+
+ if (r) {
+ if (i == 0)
+ goto userspace_io;
+
+ /*
+ * Userspace must have unregistered the device while PIO
+ * was running. Drop writes / read as 0.
+ */
+ if (in)
+ memset(data, 0, size * (count - i));
break;
- pd += vcpu->arch.pio.size;
+ }
+
+ data += size;
}
- return r;
-}
+ return 1;
-static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
- unsigned short port,
- unsigned int count, bool in)
-{
+userspace_io:
vcpu->arch.pio.port = port;
vcpu->arch.pio.in = in;
- vcpu->arch.pio.count = count;
+ vcpu->arch.pio.count = count;
vcpu->arch.pio.size = size;
- if (!kernel_pio(vcpu, vcpu->arch.pio_data))
- return 1;
+ if (in)
+ memset(vcpu->arch.pio_data, 0, size * count);
+ else
+ memcpy(vcpu->arch.pio_data, data, size * count);
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -7422,30 +7640,33 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
vcpu->run->io.count = count;
vcpu->run->io.port = port;
-
return 0;
}
-static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
- unsigned short port, unsigned int count)
+static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val, unsigned int count)
{
- WARN_ON(vcpu->arch.pio.count);
- memset(vcpu->arch.pio_data, 0, size * count);
- return emulator_pio_in_out(vcpu, size, port, count, true);
+ int r = emulator_pio_in_out(vcpu, size, port, val, count, true);
+ if (r)
+ trace_kvm_pio(KVM_PIO_IN, port, size, count, val);
+
+ return r;
}
static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
{
int size = vcpu->arch.pio.size;
- unsigned count = vcpu->arch.pio.count;
+ unsigned int count = vcpu->arch.pio.count;
memcpy(val, vcpu->arch.pio_data, size * count);
trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
vcpu->arch.pio.count = 0;
}
-static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
- unsigned short port, void *val, unsigned int count)
+static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
+ int size, unsigned short port, void *val,
+ unsigned int count)
{
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
if (vcpu->arch.pio.count) {
/*
* Complete a previous iteration that required userspace I/O.
@@ -7454,39 +7675,19 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
* shenanigans as KVM doesn't support modifying the rep count,
* and the emulator ensures @count doesn't overflow the buffer.
*/
- } else {
- int r = __emulator_pio_in(vcpu, size, port, count);
- if (!r)
- return r;
-
- /* Results already available, fall through. */
+ complete_emulator_pio_in(vcpu, val);
+ return 1;
}
- complete_emulator_pio_in(vcpu, val);
- return 1;
-}
-
-static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
- int size, unsigned short port, void *val,
- unsigned int count)
-{
- return emulator_pio_in(emul_to_vcpu(ctxt), size, port, val, count);
-
+ return emulator_pio_in(vcpu, size, port, val, count);
}
static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port, const void *val,
unsigned int count)
{
- int ret;
-
- memcpy(vcpu->arch.pio_data, val, size * count);
- trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
- ret = emulator_pio_in_out(vcpu, size, port, count, false);
- if (ret)
- vcpu->arch.pio.count = 0;
-
- return ret;
+ trace_kvm_pio(KVM_PIO_OUT, port, size, count, val);
+ return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
}
static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
@@ -7868,7 +8069,16 @@ static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
}
+static void emulator_vm_bugged(struct x86_emulate_ctxt *ctxt)
+{
+ struct kvm *kvm = emul_to_vcpu(ctxt)->kvm;
+
+ if (!kvm->vm_bugged)
+ kvm_vm_bugged(kvm);
+}
+
static const struct x86_emulate_ops emulate_ops = {
+ .vm_bugged = emulator_vm_bugged,
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
.read_std = emulator_read_std,
@@ -8145,7 +8355,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
* If the mapping is invalid in guest, let cpu retry
* it to generate fault.
*/
- if (gpa == UNMAPPED_GVA)
+ if (gpa == INVALID_GPA)
return true;
}
@@ -8672,11 +8882,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
/* For size less than 4 we merge, else we zero extend */
val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
- /*
- * Since vcpu->arch.pio.count == 1 let emulator_pio_in perform
- * the copy and tracing
- */
- emulator_pio_in(vcpu, vcpu->arch.pio.size, vcpu->arch.pio.port, &val, 1);
+ complete_emulator_pio_in(vcpu, &val);
kvm_rax_write(vcpu, val);
return kvm_skip_emulated_instruction(vcpu);
@@ -8751,7 +8957,7 @@ static void kvm_hyperv_tsc_notifier(void)
/* TSC frequency always matches when on Hyper-V */
for_each_present_cpu(cpu)
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
- kvm_max_guest_tsc_khz = tsc_khz;
+ kvm_caps.max_guest_tsc_khz = tsc_khz;
list_for_each_entry(kvm, &vm_list, vm_list) {
__kvm_start_pvclock_update(kvm);
@@ -8952,25 +9158,23 @@ static struct notifier_block pvclock_gtod_notifier = {
int kvm_arch_init(void *opaque)
{
struct kvm_x86_init_ops *ops = opaque;
+ u64 host_pat;
int r;
if (kvm_x86_ops.hardware_enable) {
pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name);
- r = -EEXIST;
- goto out;
+ return -EEXIST;
}
if (!ops->cpu_has_kvm_support()) {
pr_err_ratelimited("kvm: no hardware support for '%s'\n",
ops->runtime_ops->name);
- r = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
if (ops->disabled_by_bios()) {
pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
ops->runtime_ops->name);
- r = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
/*
@@ -8980,27 +9184,37 @@ int kvm_arch_init(void *opaque)
*/
if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
printk(KERN_ERR "kvm: inadequate fpu\n");
- r = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
if (IS_ENABLED(CONFIG_PREEMPT_RT) && !boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
pr_err("RT requires X86_FEATURE_CONSTANT_TSC\n");
- r = -EOPNOTSUPP;
- goto out;
+ return -EOPNOTSUPP;
}
- r = -ENOMEM;
+ /*
+ * KVM assumes that PAT entry '0' encodes WB memtype and simply zeroes
+ * the PAT bits in SPTEs. Bail if PAT[0] is programmed to something
+ * other than WB. Note, EPT doesn't utilize the PAT, but don't bother
+ * with an exception. PAT[0] is set to WB on RESET and also by the
+ * kernel, i.e. failure indicates a kernel bug or broken firmware.
+ */
+ if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) ||
+ (host_pat & GENMASK(2, 0)) != 6) {
+ pr_err("kvm: host PAT[0] is not WB\n");
+ return -EIO;
+ }
x86_emulator_cache = kvm_alloc_emulator_cache();
if (!x86_emulator_cache) {
pr_err("kvm: failed to allocate cache for x86 emulator\n");
- goto out;
+ return -ENOMEM;
}
user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
if (!user_return_msrs) {
printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
+ r = -ENOMEM;
goto out_free_x86_emulator_cache;
}
kvm_nr_uret_msrs = 0;
@@ -9013,7 +9227,7 @@ int kvm_arch_init(void *opaque)
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
+ kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
}
if (pi_inject_timer == -1)
@@ -9031,7 +9245,6 @@ out_free_percpu:
free_percpu(user_return_msrs);
out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
-out:
return r;
}
@@ -9406,7 +9619,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
- if (vcpu->arch.apicv_active)
+ if (vcpu->arch.apic->apicv_active)
return;
if (!vcpu->arch.apic->vapic_addr)
@@ -9435,6 +9648,11 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
static void kvm_inject_exception(struct kvm_vcpu *vcpu)
{
+ trace_kvm_inj_exception(vcpu->arch.exception.nr,
+ vcpu->arch.exception.has_error_code,
+ vcpu->arch.exception.error_code,
+ vcpu->arch.exception.injected);
+
if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
vcpu->arch.exception.error_code = false;
static_call(kvm_x86_queue_exception)(vcpu);
@@ -9470,7 +9688,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
static_call(kvm_x86_inject_nmi)(vcpu);
can_inject = false;
} else if (vcpu->arch.interrupt.injected) {
- static_call(kvm_x86_inject_irq)(vcpu);
+ static_call(kvm_x86_inject_irq)(vcpu, true);
can_inject = false;
}
}
@@ -9492,13 +9710,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
/* try to inject new event if pending */
if (vcpu->arch.exception.pending) {
- trace_kvm_inj_exception(vcpu->arch.exception.nr,
- vcpu->arch.exception.has_error_code,
- vcpu->arch.exception.error_code);
-
- vcpu->arch.exception.pending = false;
- vcpu->arch.exception.injected = true;
-
if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
X86_EFLAGS_RF);
@@ -9512,6 +9723,10 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
}
kvm_inject_exception(vcpu);
+
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.injected = true;
+
can_inject = false;
}
@@ -9564,7 +9779,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
goto out;
if (r) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
- static_call(kvm_x86_inject_irq)(vcpu);
+ static_call(kvm_x86_inject_irq)(vcpu, false);
WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
}
if (kvm_cpu_has_injectable_intr(vcpu))
@@ -9857,6 +10072,7 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
+ struct kvm_lapic *apic = vcpu->arch.apic;
bool activate;
if (!lapic_in_kernel(vcpu))
@@ -9865,12 +10081,14 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
down_read(&vcpu->kvm->arch.apicv_update_lock);
preempt_disable();
- activate = kvm_vcpu_apicv_activated(vcpu);
+ /* Do not activate APICV when APIC is disabled */
+ activate = kvm_vcpu_apicv_activated(vcpu) &&
+ (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);
- if (vcpu->arch.apicv_active == activate)
+ if (apic->apicv_active == activate)
goto out;
- vcpu->arch.apicv_active = activate;
+ apic->apicv_active = activate;
kvm_apic_update_apicv(vcpu);
static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
@@ -9880,7 +10098,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
* still active when the interrupt got accepted. Make sure
* inject_pending_event() is called to check for that.
*/
- if (!vcpu->arch.apicv_active)
+ if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
@@ -10275,7 +10493,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* per-VM state, and responsing vCPUs must wait for the update
* to complete before servicing KVM_REQ_APICV_UPDATE.
*/
- WARN_ON_ONCE(kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu));
+ WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
+ (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
@@ -10654,8 +10873,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = cui(vcpu);
if (r <= 0)
goto out;
- } else
- WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
+ } else {
+ WARN_ON_ONCE(vcpu->arch.pio.count);
+ WARN_ON_ONCE(vcpu->mmio_needed);
+ }
if (kvm_run->immediate_exit) {
r = -EINTR;
@@ -11183,7 +11404,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
tr->physical_address = gpa;
- tr->valid = gpa != UNMAPPED_GVA;
+ tr->valid = gpa != INVALID_GPA;
tr->writeable = 1;
tr->usermode = 0;
@@ -11276,11 +11497,17 @@ static int sync_regs(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
- if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+ if (kvm_check_tsc_unstable() && kvm->created_vcpus)
pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n");
- return 0;
+ if (!kvm->arch.max_vcpu_ids)
+ kvm->arch.max_vcpu_ids = KVM_MAX_VCPU_IDS;
+
+ if (id >= kvm->arch.max_vcpu_ids)
+ return -EINVAL;
+
+ return static_call(kvm_x86_vcpu_precreate)(kvm);
}
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
@@ -11317,7 +11544,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
* will ensure the vCPU gets the correct state before VM-Entry.
*/
if (enable_apicv) {
- vcpu->arch.apicv_active = true;
+ vcpu->arch.apic->apicv_active = true;
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
} else
@@ -11330,9 +11557,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto fail_free_lapic;
vcpu->arch.pio_data = page_address(page);
- vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+ vcpu->arch.mce_banks = kcalloc(KVM_MAX_MCE_BANKS * 4, sizeof(u64),
GFP_KERNEL_ACCOUNT);
- if (!vcpu->arch.mce_banks)
+ vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
+ GFP_KERNEL_ACCOUNT);
+ if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
goto fail_free_pio_data;
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
@@ -11386,6 +11615,7 @@ free_wbinvd_dirty_mask:
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
+ kfree(vcpu->arch.mci_ctl2_banks);
fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data);
fail_free_lapic:
@@ -11431,6 +11661,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_uninit(vcpu);
kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks);
+ kfree(vcpu->arch.mci_ctl2_banks);
kvm_free_lapic(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_mmu_destroy(vcpu);
@@ -11510,6 +11741,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.smbase = 0x30000;
vcpu->arch.msr_misc_features_enables = 0;
+ vcpu->arch.ia32_misc_enable_msr = MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
+ MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
__kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP);
__kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true);
@@ -11526,7 +11759,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
* i.e. it's impossible for kvm_find_cpuid_entry() to find a valid entry
* on RESET. But, go through the motions in case that's ever remedied.
*/
- cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0);
+ cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1);
kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600);
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
@@ -11717,6 +11950,8 @@ int kvm_arch_hardware_setup(void *opaque)
if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
+ kvm_init_pmu_capability();
+
r = ops->hardware_setup();
if (r != 0)
return r;
@@ -11726,13 +11961,13 @@ int kvm_arch_hardware_setup(void *opaque)
kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
- supported_xss = 0;
+ kvm_caps.supported_xss = 0;
#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
#undef __kvm_cpu_cap_has
- if (kvm_has_tsc_control) {
+ if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
* fit into a signed integer.
@@ -11740,10 +11975,10 @@ int kvm_arch_hardware_setup(void *opaque)
* be 1 on all machines.
*/
u64 max = min(0x7fffffffULL,
- __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
- kvm_max_guest_tsc_khz = max;
+ __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz));
+ kvm_caps.max_guest_tsc_khz = max;
}
- kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
+ kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
kvm_init_msr_list();
return 0;
}
@@ -12331,7 +12566,8 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
+ if (kvm_vcpu_apicv_active(vcpu) &&
+ static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
return true;
return false;
@@ -12773,7 +13009,7 @@ void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_c
(PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
if (!(error_code & PFERR_PRESENT_MASK) ||
- mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != UNMAPPED_GVA) {
+ mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != INVALID_GPA) {
/*
* If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
* tables probably do not match the TLB. Just proceed
@@ -12998,6 +13234,12 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
+static void advance_sev_es_emulated_pio(struct kvm_vcpu *vcpu, unsigned count, int size)
+{
+ vcpu->arch.sev_pio_count -= count;
+ vcpu->arch.sev_pio_data += count * size;
+}
+
static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
unsigned int port);
@@ -13021,8 +13263,7 @@ static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
/* memcpy done already by emulator_pio_out. */
- vcpu->arch.sev_pio_count -= count;
- vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+ advance_sev_es_emulated_pio(vcpu, count, size);
if (!ret)
break;
@@ -13038,20 +13279,14 @@ static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
unsigned int port);
-static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
-{
- unsigned count = vcpu->arch.pio.count;
- complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
- vcpu->arch.sev_pio_count -= count;
- vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
-}
-
static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
{
+ unsigned count = vcpu->arch.pio.count;
int size = vcpu->arch.pio.size;
int port = vcpu->arch.pio.port;
- advance_sev_es_emulated_ins(vcpu);
+ complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
+ advance_sev_es_emulated_pio(vcpu, count, size);
if (vcpu->arch.sev_pio_count)
return kvm_sev_es_ins(vcpu, size, port);
return 1;
@@ -13063,11 +13298,11 @@ static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
for (;;) {
unsigned int count =
min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
- if (!__emulator_pio_in(vcpu, size, port, count))
+ if (!emulator_pio_in(vcpu, size, port, vcpu->arch.sev_pio_data, count))
break;
/* Emulation done by the kernel. */
- advance_sev_es_emulated_ins(vcpu);
+ advance_sev_es_emulated_pio(vcpu, count, size);
if (!vcpu->arch.sev_pio_count)
return 1;
}
@@ -13110,6 +13345,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_kick_vcpu_slowpath);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_doorbell);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);