From c3b37c2d77a2c735857c55492ee81e88e855497d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 19 Jan 2023 17:09:02 +0000 Subject: KVM: arm64: Pass the actual page address to mte_clear_page_tags() Commit d77e59a8fccd ("arm64: mte: Lock a page for MTE tag initialisation") added a call to mte_clear_page_tags() in case a prior mte_copy_tags_from_user() failed in order to avoid stale tags in the guest page (it should have really been a separate commit). Unfortunately, the argument passed to this function was the address of the struct page rather than the actual page address. Fix this function call. Fixes: d77e59a8fccd ("arm64: mte: Lock a page for MTE tag initialisation") Signed-off-by: Catalin Marinas Cc: Marc Zyngier Reviewed-by: Oliver Upton Reviewed-by: Anshuman Khandual Reviewed-by: Cornelia Huck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230119170902.1574756-1-catalin.marinas@arm.com --- arch/arm64/kvm/guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5626ddb540ce..cf4c495a4321 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1079,7 +1079,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, /* uaccess failed, don't leave stale tags */ if (num_tags != MTE_GRANULES_PER_PAGE) - mte_clear_page_tags(page); + mte_clear_page_tags(maddr); set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); -- cgit v1.2.3 From ef3691683d7bfd0a2acf48812e4ffe894f10bfa8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 19 Jan 2023 11:07:59 +0000 Subject: KVM: arm64: GICv4.1: Fix race with doorbell on VPE activation/deactivation To save the vgic LPI pending state with GICv4.1, the VPEs must all be unmapped from the ITSs so that the sGIC caches can be flushed. The opposite is done once the state is saved. This is all done by using the activate/deactivate irqdomain callbacks directly from the vgic code. Crutially, this is done without holding the irqdesc lock for the interrupts that represent the VPE. And these callbacks are changing the state of the irqdesc. What could possibly go wrong? If a doorbell fires while we are messing with the irqdesc state, it will acquire the lock and change the interrupt state concurrently. Since we don't hole the lock, curruption occurs in on the interrupt state. Oh well. While acquiring the lock would fix this (and this was Shanker's initial approach), this is still a layering violation we could do without. A better approach is actually to free the VPE interrupt, do what we have to do, and re-request it. It is more work, but this usually happens only once in the lifetime of the VM and we don't really care about this sort of overhead. Fixes: f66b7b151e00 ("KVM: arm64: GICv4.1: Try to save VLPI state in save_pending_tables") Reported-by: Shanker Donthineni Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230118022348.4137094-1-sdonthineni@nvidia.com --- arch/arm64/kvm/vgic/vgic-v3.c | 25 +++++++++++-------------- arch/arm64/kvm/vgic/vgic-v4.c | 8 ++++++-- arch/arm64/kvm/vgic/vgic.h | 1 + 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 2074521d4a8c..2624963cb95b 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -350,26 +350,23 @@ retry: * The deactivation of the doorbell interrupt will trigger the * unmapping of the associated vPE. */ -static void unmap_all_vpes(struct vgic_dist *dist) +static void unmap_all_vpes(struct kvm *kvm) { - struct irq_desc *desc; + struct vgic_dist *dist = &kvm->arch.vgic; int i; - for (i = 0; i < dist->its_vm.nr_vpes; i++) { - desc = irq_to_desc(dist->its_vm.vpes[i]->irq); - irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); - } + for (i = 0; i < dist->its_vm.nr_vpes; i++) + free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); } -static void map_all_vpes(struct vgic_dist *dist) +static void map_all_vpes(struct kvm *kvm) { - struct irq_desc *desc; + struct vgic_dist *dist = &kvm->arch.vgic; int i; - for (i = 0; i < dist->its_vm.nr_vpes; i++) { - desc = irq_to_desc(dist->its_vm.vpes[i]->irq); - irq_domain_activate_irq(irq_desc_get_irq_data(desc), false); - } + for (i = 0; i < dist->its_vm.nr_vpes; i++) + WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), + dist->its_vm.vpes[i]->irq)); } /** @@ -394,7 +391,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) * and enabling of the doorbells have already been done. */ if (kvm_vgic_global_state.has_gicv4_1) { - unmap_all_vpes(dist); + unmap_all_vpes(kvm); vlpi_avail = true; } @@ -444,7 +441,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) out: if (vlpi_avail) - map_all_vpes(dist); + map_all_vpes(kvm); return ret; } diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index ad06ba6c9b00..a413718be92b 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -222,6 +222,11 @@ void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val) *val = !!(*ptr & mask); } +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq) +{ + return request_irq(irq, vgic_v4_doorbell_handler, 0, "vcpu", vcpu); +} + /** * vgic_v4_init - Initialize the GICv4 data structures * @kvm: Pointer to the VM being initialized @@ -283,8 +288,7 @@ int vgic_v4_init(struct kvm *kvm) irq_flags &= ~IRQ_NOAUTOEN; irq_set_status_flags(irq, irq_flags); - ret = request_irq(irq, vgic_v4_doorbell_handler, - 0, "vcpu", vcpu); + ret = vgic_v4_request_vpe_irq(vcpu, irq); if (ret) { kvm_err("failed to allocate vcpu IRQ%d\n", irq); /* diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 0c8da72953f0..23e280fa0a16 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -331,5 +331,6 @@ int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); +int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); #endif -- cgit v1.2.3 From 50aa870ba2f7735f556e52d15f61cd0f359c4c0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 22 Jan 2023 04:04:50 -0500 Subject: selftests: kvm: move declaration at the beginning of main() Placing a declaration of evt_reset is pedantically invalid according to the C standard. While GCC does not really care and only warns with -Wpedantic, clang ignores the declaration altogether with an error: x86_64/xen_shinfo_test.c:965:2: error: expected expression struct kvm_xen_hvm_attr evt_reset = { ^ x86_64/xen_shinfo_test.c:969:38: error: use of undeclared identifier evt_reset vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); ^ Reported-by: Yu Zhang Reported-by: Sean Christopherson Fixes: a79b53aaaab5 ("KVM: x86: fix deadlock for KVM_XEN_EVTCHN_RESET", 2022-12-28) Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index dae510c263b4..13c75dc18c10 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -434,6 +434,7 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; bool verbose; @@ -962,10 +963,8 @@ int main(int argc, char *argv[]) } done: - struct kvm_xen_hvm_attr evt_reset = { - .type = KVM_XEN_ATTR_TYPE_EVTCHN, - .u.evtchn.flags = KVM_XEN_EVTCHN_RESET, - }; + evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN; + evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); -- cgit v1.2.3 From a44b331614e6f7e63902ed7dff7adc8c85edd8bc Mon Sep 17 00:00:00 2001 From: Hendrik Borghorst Date: Mon, 14 Nov 2022 16:48:23 +0000 Subject: KVM: x86/vmx: Do not skip segment attributes if unusable bit is set When serializing and deserializing kvm_sregs, attributes of the segment descriptors are stored by user space. For unusable segments, vmx_segment_access_rights skips all attributes and sets them to 0. This means we zero out the DPL (Descriptor Privilege Level) for unusable entries. Unusable segments are - contrary to their name - usable in 64bit mode and are used by guests to for example create a linear map through the NULL selector. VMENTER checks if SS.DPL is correct depending on the CS segment type. For types 9 (Execute Only) and 11 (Execute Read), CS.DPL must be equal to SS.DPL [1]. We have seen real world guests setting CS to a usable segment with DPL=3 and SS to an unusable segment with DPL=3. Once we go through an sregs get/set cycle, SS.DPL turns to 0. This causes the virtual machine to crash reproducibly. This commit changes the attribute logic to always preserve attributes for unusable segments. According to [2] SS.DPL is always saved on VM exits, regardless of the unusable bit so user space applications should have saved the information on serialization correctly. [3] specifies that besides SS.DPL the rest of the attributes of the descriptors are undefined after VM entry if unusable bit is set. So, there should be no harm in setting them all to the previous state. [1] Intel SDM Vol 3C 26.3.1.2 Checks on Guest Segment Registers [2] Intel SDM Vol 3C 27.3.2 Saving Segment Registers and Descriptor-Table Registers [3] Intel SDM Vol 3C 26.3.2.2 Loading Guest Segment Registers and Descriptor-Table Registers Cc: Alexander Graf Cc: stable@vger.kernel.org Signed-off-by: Hendrik Borghorst Reviewed-by: Jim Mattson Reviewed-by: Alexander Graf Message-Id: <20221114164823.69555-1-hborghor@amazon.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fc9008dbed33..7eec0226d56a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3440,18 +3440,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } + ar = var->type & 15; + ar |= (var->s & 1) << 4; + ar |= (var->dpl & 3) << 5; + ar |= (var->present & 1) << 7; + ar |= (var->avl & 1) << 12; + ar |= (var->l & 1) << 13; + ar |= (var->db & 1) << 14; + ar |= (var->g & 1) << 15; + ar |= (var->unusable || !var->present) << 16; return ar; } -- cgit v1.2.3 From c2c46b10d52624376322b01654095a84611c7e09 Mon Sep 17 00:00:00 2001 From: Vipin Sharma Date: Wed, 11 Jan 2023 10:34:08 -0800 Subject: KVM: selftests: Make reclaim_period_ms input always be positive reclaim_period_ms used to be positive only but the commit 0001725d0f9b ("KVM: selftests: Add atoi_positive() and atoi_non_negative() for input validation") incorrectly changed it to non-negative validation. Change validation to allow only positive input. Fixes: 0001725d0f9b ("KVM: selftests: Add atoi_positive() and atoi_non_negative() for input validation") Signed-off-by: Vipin Sharma Reported-by: Ben Gardon Reviewed-by: Ben Gardon Reviewed-by: Sean Christopherson Message-Id: <20230111183408.104491-1-vipinsh@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index ea0978f22db8..251794f83719 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -241,7 +241,7 @@ int main(int argc, char **argv) while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { switch (opt) { case 'p': - reclaim_period_ms = atoi_non_negative("Reclaim period", optarg); + reclaim_period_ms = atoi_positive("Reclaim period", optarg); break; case 't': token = atoi_paranoid(optarg); -- cgit v1.2.3