summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/Kconfig6
-rw-r--r--virt/kvm/dirty_ring.c46
-rw-r--r--virt/kvm/irqchip.c3
-rw-r--r--virt/kvm/kvm_main.c159
-rw-r--r--virt/kvm/kvm_mm.h4
-rw-r--r--virt/kvm/pfncache.c121
6 files changed, 236 insertions, 103 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 800f9470e36b..9fb1ff6f19e5 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL
bool
select HAVE_KVM_DIRTY_RING
+# Allow enabling both the dirty bitmap and dirty ring. Only architectures
+# that need to dirty memory outside of a vCPU context should select this.
+config NEED_KVM_DIRTY_RING_WITH_BITMAP
+ bool
+ depends on HAVE_KVM_DIRTY_RING
+
config HAVE_KVM_EVENTFD
bool
select EVENTFD
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index d6fabf238032..c1cd7dfe4a90 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -21,12 +21,26 @@ u32 kvm_dirty_ring_get_rsvd_entries(void)
return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
}
+bool kvm_use_dirty_bitmap(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->slots_lock);
+
+ return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap;
+}
+
+#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
+{
+ return false;
+}
+#endif
+
static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
{
return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
}
-bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+static bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
{
return kvm_dirty_ring_used(ring) >= ring->soft_limit;
}
@@ -142,13 +156,19 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
+ /*
+ * The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
+ * by the VCPU thread next time when it enters the guest.
+ */
+
trace_kvm_dirty_ring_reset(ring);
return count;
}
-void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
+void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)
{
+ struct kvm_dirty_ring *ring = &vcpu->dirty_ring;
struct kvm_dirty_gfn *entry;
/* It should never get full */
@@ -166,6 +186,28 @@ void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
kvm_dirty_gfn_set_dirtied(entry);
ring->dirty_index++;
trace_kvm_dirty_ring_push(ring, slot, offset);
+
+ if (kvm_dirty_ring_soft_full(ring))
+ kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
+}
+
+bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu)
+{
+ /*
+ * The VCPU isn't runnable when the dirty ring becomes soft full.
+ * The KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent
+ * the VCPU from running until the dirty pages are harvested and
+ * the dirty ring is reset by userspace.
+ */
+ if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) &&
+ kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) {
+ kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+ trace_kvm_dirty_ring_exit(vcpu);
+ return true;
+ }
+
+ return false;
}
struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 58e4f88b2b9f..1e567d1f6d3d 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -17,7 +17,6 @@
#include <linux/srcu.h>
#include <linux/export.h>
#include <trace/events/kvm.h>
-#include "irq.h"
int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries, int gsi)
@@ -50,7 +49,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
{
struct kvm_kernel_irq_routing_entry route;
- if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
+ if (!kvm_arch_irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
return -EINVAL;
route.msi.address_lo = msi->address_lo;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25d7872b29c1..13e88297f999 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
goto out_err_no_arch_destroy_vm;
}
- kvm->max_halt_poll_ns = halt_poll_ns;
-
r = kvm_arch_init_vm(kvm, type);
if (r)
goto out_err_no_arch_destroy_vm;
@@ -1617,7 +1615,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
new->dirty_bitmap = NULL;
else if (old && old->dirty_bitmap)
new->dirty_bitmap = old->dirty_bitmap;
- else if (!kvm->dirty_ring_size) {
+ else if (kvm_use_dirty_bitmap(kvm)) {
r = kvm_alloc_dirty_bitmap(new);
if (r)
return r;
@@ -1641,6 +1639,8 @@ static void kvm_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ int old_flags = old ? old->flags : 0;
+ int new_flags = new ? new->flags : 0;
/*
* Update the total number of memslot pages before calling the arch
* hook so that architectures can consume the result directly.
@@ -1650,6 +1650,12 @@ static void kvm_commit_memory_region(struct kvm *kvm,
else if (change == KVM_MR_CREATE)
kvm->nr_memslot_pages += new->npages;
+ if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES) {
+ int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1;
+ atomic_set(&kvm->nr_memslots_dirty_logging,
+ atomic_read(&kvm->nr_memslots_dirty_logging) + change);
+ }
+
kvm_arch_commit_memory_region(kvm, old, new, change);
switch (change) {
@@ -2060,8 +2066,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
unsigned long n;
unsigned long any = 0;
- /* Dirty ring tracking is exclusive to dirty log tracking */
- if (kvm->dirty_ring_size)
+ /* Dirty ring tracking may be exclusive to dirty log tracking */
+ if (!kvm_use_dirty_bitmap(kvm))
return -ENXIO;
*memslot = NULL;
@@ -2125,8 +2131,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
unsigned long *dirty_bitmap_buffer;
bool flush;
- /* Dirty ring tracking is exclusive to dirty log tracking */
- if (kvm->dirty_ring_size)
+ /* Dirty ring tracking may be exclusive to dirty log tracking */
+ if (!kvm_use_dirty_bitmap(kvm))
return -ENXIO;
as_id = log->slot >> 16;
@@ -2237,8 +2243,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
unsigned long *dirty_bitmap_buffer;
bool flush;
- /* Dirty ring tracking is exclusive to dirty log tracking */
- if (kvm->dirty_ring_size)
+ /* Dirty ring tracking may be exclusive to dirty log tracking */
+ if (!kvm_use_dirty_bitmap(kvm))
return -ENXIO;
as_id = log->slot >> 16;
@@ -2514,7 +2520,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
* 1 indicates success, -errno is returned if error is detected.
*/
static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
- bool *writable, kvm_pfn_t *pfn)
+ bool interruptible, bool *writable, kvm_pfn_t *pfn)
{
unsigned int flags = FOLL_HWPOISON;
struct page *page;
@@ -2529,6 +2535,8 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
flags |= FOLL_WRITE;
if (async)
flags |= FOLL_NOWAIT;
+ if (interruptible)
+ flags |= FOLL_INTERRUPTIBLE;
npages = get_user_pages_unlocked(addr, 1, &page, flags);
if (npages != 1)
@@ -2638,6 +2646,7 @@ out:
* Pin guest page in memory and return its pfn.
* @addr: host virtual address which maps memory to the guest
* @atomic: whether this function can sleep
+ * @interruptible: whether the process can be interrupted by non-fatal signals
* @async: whether this function need to wait IO complete if the
* host page is not in the memory
* @write_fault: whether we should get a writable host page
@@ -2648,8 +2657,8 @@ out:
* 2): @write_fault = false && @writable, @writable will tell the caller
* whether the mapping is writable.
*/
-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable)
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
+ bool *async, bool write_fault, bool *writable)
{
struct vm_area_struct *vma;
kvm_pfn_t pfn;
@@ -2664,9 +2673,12 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
if (atomic)
return KVM_PFN_ERR_FAULT;
- npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+ npages = hva_to_pfn_slow(addr, async, write_fault, interruptible,
+ writable, &pfn);
if (npages == 1)
return pfn;
+ if (npages == -EINTR)
+ return KVM_PFN_ERR_SIGPENDING;
mmap_read_lock(current->mm);
if (npages == -EHWPOISON ||
@@ -2697,8 +2709,8 @@ exit:
}
kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
- bool atomic, bool *async, bool write_fault,
- bool *writable, hva_t *hva)
+ bool atomic, bool interruptible, bool *async,
+ bool write_fault, bool *writable, hva_t *hva)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
@@ -2723,7 +2735,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
writable = NULL;
}
- return hva_to_pfn(addr, atomic, async, write_fault,
+ return hva_to_pfn(addr, atomic, interruptible, async, write_fault,
writable);
}
EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
@@ -2731,20 +2743,22 @@ EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
{
- return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
- write_fault, writable, NULL);
+ return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false,
+ NULL, write_fault, writable, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true,
+ NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true,
+ NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
@@ -3305,18 +3319,19 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
#ifdef CONFIG_HAVE_KVM_DIRTY_RING
- if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
+ if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
return;
+
+ WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
#endif
if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
u32 slot = (memslot->as_id << 16) | memslot->id;
- if (kvm->dirty_ring_size)
- kvm_dirty_ring_push(&vcpu->dirty_ring,
- slot, rel_gfn);
- else
+ if (kvm->dirty_ring_size && vcpu)
+ kvm_dirty_ring_push(vcpu, slot, rel_gfn);
+ else if (memslot->dirty_bitmap)
set_bit_le(rel_gfn, memslot->dirty_bitmap);
}
}
@@ -3377,9 +3392,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
if (val < grow_start)
val = grow_start;
- if (val > vcpu->kvm->max_halt_poll_ns)
- val = vcpu->kvm->max_halt_poll_ns;
-
vcpu->halt_poll_ns = val;
out:
trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
@@ -3483,6 +3495,24 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
}
}
+static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (kvm->override_halt_poll_ns) {
+ /*
+ * Ensure kvm->max_halt_poll_ns is not read before
+ * kvm->override_halt_poll_ns.
+ *
+ * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL.
+ */
+ smp_rmb();
+ return READ_ONCE(kvm->max_halt_poll_ns);
+ }
+
+ return READ_ONCE(halt_poll_ns);
+}
+
/*
* Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt
* polling is enabled, busy wait for a short time before blocking to avoid the
@@ -3491,21 +3521,23 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
*/
void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
+ unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu);
bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
- bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
ktime_t start, cur, poll_end;
bool waited = false;
+ bool do_halt_poll;
u64 halt_ns;
+ if (vcpu->halt_poll_ns > max_halt_poll_ns)
+ vcpu->halt_poll_ns = max_halt_poll_ns;
+
+ do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
+
start = cur = poll_end = ktime_get();
if (do_halt_poll) {
ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
do {
- /*
- * This sets KVM_REQ_UNHALT if an interrupt
- * arrives.
- */
if (kvm_vcpu_check_block(vcpu) < 0)
goto out;
cpu_relax();
@@ -3535,18 +3567,21 @@ out:
update_halt_poll_stats(vcpu, start, poll_end, !waited);
if (halt_poll_allowed) {
+ /* Recompute the max halt poll time in case it changed. */
+ max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu);
+
if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- } else if (vcpu->kvm->max_halt_poll_ns) {
+ } else if (max_halt_poll_ns) {
if (halt_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
else if (vcpu->halt_poll_ns &&
- halt_ns > vcpu->kvm->max_halt_poll_ns)
+ halt_ns > max_halt_poll_ns)
shrink_halt_poll_ns(vcpu);
/* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
- halt_ns < vcpu->kvm->max_halt_poll_ns)
+ else if (vcpu->halt_poll_ns < max_halt_poll_ns &&
+ halt_ns < max_halt_poll_ns)
grow_halt_poll_ns(vcpu);
} else {
vcpu->halt_poll_ns = 0;
@@ -4484,6 +4519,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#else
return 0;
#endif
+#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+ case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP:
+#endif
case KVM_CAP_BINARY_STATS_FD:
case KVM_CAP_SYSTEM_EVENT_DATA:
return 1;
@@ -4559,6 +4597,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return -EINVAL;
}
+static bool kvm_are_all_memslots_empty(struct kvm *kvm)
+{
+ int i;
+
+ lockdep_assert_held(&kvm->slots_lock);
+
+ for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+ if (!kvm_memslots_empty(__kvm_memslots(kvm, i)))
+ return false;
+ }
+
+ return true;
+}
+
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
@@ -4581,6 +4633,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
return -EINVAL;
kvm->max_halt_poll_ns = cap->args[0];
+
+ /*
+ * Ensure kvm->override_halt_poll_ns does not become visible
+ * before kvm->max_halt_poll_ns.
+ *
+ * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns().
+ */
+ smp_wmb();
+ kvm->override_halt_poll_ns = true;
+
return 0;
}
case KVM_CAP_DIRTY_LOG_RING:
@@ -4589,6 +4651,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
return -EINVAL;
return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
+ case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: {
+ int r = -EINVAL;
+
+ if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) ||
+ !kvm->dirty_ring_size || cap->flags)
+ return r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ /*
+ * For simplicity, allow enabling ring+bitmap if and only if
+ * there are no memslots, e.g. to ensure all memslots allocate
+ * a bitmap after the capability is enabled.
+ */
+ if (kvm_are_all_memslots_empty(kvm)) {
+ kvm->dirty_ring_with_bitmap = true;
+ r = 0;
+ }
+
+ mutex_unlock(&kvm->slots_lock);
+
+ return r;
+ }
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
}
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 41da467d99c9..a1ab15006af3 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -24,8 +24,8 @@
#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
#endif /* KVM_HAVE_MMU_RWLOCK */
-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
- bool write_fault, bool *writable);
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
+ bool *async, bool write_fault, bool *writable);
#ifdef CONFIG_HAVE_KVM_PFNCACHE
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 346e47f15572..2d6aba677830 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -76,19 +76,17 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
}
}
-bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- gpa_t gpa, unsigned long len)
+bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
if (!gpc->active)
return false;
- if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+ if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
return false;
- if (gpc->gpa != gpa || gpc->generation != slots->generation ||
- kvm_is_error_hva(gpc->uhva))
+ if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
return false;
if (!gpc->valid)
@@ -96,9 +94,9 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
return true;
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
+EXPORT_SYMBOL_GPL(kvm_gpc_check);
-static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
+static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
{
/* Unmap the old pfn/page if it was mapped before. */
if (!is_error_noslot_pfn(pfn) && khva) {
@@ -139,7 +137,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
return kvm->mmu_invalidate_seq != mmu_seq;
}
-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
{
/* Note, the new page offset may be different than the old! */
void *old_khva = gpc->khva - offset_in_page(gpc->khva);
@@ -159,7 +157,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
gpc->valid = false;
do {
- mmu_seq = kvm->mmu_invalidate_seq;
+ mmu_seq = gpc->kvm->mmu_invalidate_seq;
smp_rmb();
write_unlock_irq(&gpc->lock);
@@ -177,7 +175,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
* the existing mapping and didn't create a new one.
*/
if (new_khva != old_khva)
- gpc_unmap_khva(kvm, new_pfn, new_khva);
+ gpc_unmap_khva(new_pfn, new_khva);
kvm_release_pfn_clean(new_pfn);
@@ -185,7 +183,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
}
/* We always request a writeable mapping */
- new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
+ new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL);
if (is_error_noslot_pfn(new_pfn))
goto out_error;
@@ -217,7 +215,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
* attempting to refresh.
*/
WARN_ON_ONCE(gpc->valid);
- } while (mmu_notifier_retry_cache(kvm, mmu_seq));
+ } while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
gpc->valid = true;
gpc->pfn = new_pfn;
@@ -238,10 +236,10 @@ out_error:
return -EFAULT;
}
-int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- gpa_t gpa, unsigned long len)
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+ unsigned long len)
{
- struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
unsigned long page_offset = gpa & ~PAGE_MASK;
bool unmap_old = false;
unsigned long old_uhva;
@@ -295,12 +293,16 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* drop the lock and do the HVA to PFN lookup again.
*/
if (!gpc->valid || old_uhva != gpc->uhva) {
- ret = hva_to_pfn_retry(kvm, gpc);
+ ret = hva_to_pfn_retry(gpc);
} else {
- /* If the HVA→PFN mapping was already valid, don't unmap it. */
- old_pfn = KVM_PFN_ERR_FAULT;
- old_khva = NULL;
+ /*
+ * If the HVA→PFN mapping was already valid, don't unmap it.
+ * But do update gpc->khva because the offset within the page
+ * may have changed.
+ */
+ gpc->khva = old_khva + page_offset;
ret = 0;
+ goto out_unlock;
}
out:
@@ -324,59 +326,41 @@ out_unlock:
mutex_unlock(&gpc->refresh_lock);
if (unmap_old)
- gpc_unmap_khva(kvm, old_pfn, old_khva);
+ gpc_unmap_khva(old_pfn, old_khva);
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
-void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
{
- void *old_khva;
- kvm_pfn_t old_pfn;
-
- mutex_lock(&gpc->refresh_lock);
- write_lock_irq(&gpc->lock);
-
- gpc->valid = false;
-
- old_khva = gpc->khva - offset_in_page(gpc->khva);
- old_pfn = gpc->pfn;
-
- /*
- * We can leave the GPA → uHVA map cache intact but the PFN
- * lookup will need to be redone even for the same page.
- */
- gpc->khva = NULL;
- gpc->pfn = KVM_PFN_ERR_FAULT;
-
- write_unlock_irq(&gpc->lock);
- mutex_unlock(&gpc->refresh_lock);
-
- gpc_unmap_khva(kvm, old_pfn, old_khva);
+ return __kvm_gpc_refresh(gpc, gpc->gpa, len);
}
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
+EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
+ struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
{
+ WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
+ WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
+
rwlock_init(&gpc->lock);
mutex_init(&gpc->refresh_lock);
+
+ gpc->kvm = kvm;
+ gpc->vcpu = vcpu;
+ gpc->usage = usage;
+ gpc->pfn = KVM_PFN_ERR_FAULT;
+ gpc->uhva = KVM_HVA_ERR_BAD;
}
EXPORT_SYMBOL_GPL(kvm_gpc_init);
-int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
- gpa_t gpa, unsigned long len)
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
{
- WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
+ struct kvm *kvm = gpc->kvm;
if (!gpc->active) {
- gpc->khva = NULL;
- gpc->pfn = KVM_PFN_ERR_FAULT;
- gpc->uhva = KVM_HVA_ERR_BAD;
- gpc->vcpu = vcpu;
- gpc->usage = usage;
- gpc->valid = false;
+ if (KVM_BUG_ON(gpc->valid, kvm))
+ return -EIO;
spin_lock(&kvm->gpc_lock);
list_add(&gpc->list, &kvm->gpc_list);
@@ -391,12 +375,16 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
gpc->active = true;
write_unlock_irq(&gpc->lock);
}
- return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
+ return __kvm_gpc_refresh(gpc, gpa, len);
}
EXPORT_SYMBOL_GPL(kvm_gpc_activate);
-void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
{
+ struct kvm *kvm = gpc->kvm;
+ kvm_pfn_t old_pfn;
+ void *old_khva;
+
if (gpc->active) {
/*
* Deactivate the cache before removing it from the list, KVM
@@ -405,13 +393,26 @@ void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
*/
write_lock_irq(&gpc->lock);
gpc->active = false;
+ gpc->valid = false;
+
+ /*
+ * Leave the GPA => uHVA cache intact, it's protected by the
+ * memslot generation. The PFN lookup needs to be redone every
+ * time as mmu_notifier protection is lost when the cache is
+ * removed from the VM's gpc_list.
+ */
+ old_khva = gpc->khva - offset_in_page(gpc->khva);
+ gpc->khva = NULL;
+
+ old_pfn = gpc->pfn;
+ gpc->pfn = KVM_PFN_ERR_FAULT;
write_unlock_irq(&gpc->lock);
spin_lock(&kvm->gpc_lock);
list_del(&gpc->list);
spin_unlock(&kvm->gpc_lock);
- kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
+ gpc_unmap_khva(old_pfn, old_khva);
}
}
EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);