summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r--arch/x86/kvm/mmu/mmu.c53
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c113
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h4
3 files changed, 87 insertions, 83 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c478904af518..6d16481aa29d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3493,26 +3493,25 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
{
struct kvm_shadow_walk_iterator iterator;
- int leaf = vcpu->arch.mmu->root_level;
+ int leaf = -1;
u64 spte;
-
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr);
+ for (shadow_walk_init(&iterator, vcpu, addr),
+ *root_level = iterator.level;
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
- sptes[leaf - 1] = spte;
+ sptes[leaf] = spte;
if (!is_shadow_present_pte(spte))
break;
-
}
walk_shadow_page_lockless_end(vcpu);
@@ -3520,14 +3519,12 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
return leaf;
}
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
- u64 sptes[PT64_ROOT_MAX_LEVEL];
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
struct rsvd_bits_validate *rsvd_check;
- int root = vcpu->arch.mmu->shadow_root_level;
- int leaf;
- int level;
+ int root, leaf, level;
bool reserved = false;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3536,35 +3533,45 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
}
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
- leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
else
- leaf = get_walk(vcpu, addr, sptes);
+ leaf = get_walk(vcpu, addr, sptes, &root);
+
+ if (unlikely(leaf < 0)) {
+ *sptep = 0ull;
+ return reserved;
+ }
+
+ *sptep = sptes[leaf];
+
+ /*
+ * Skip reserved bits checks on the terminal leaf if it's not a valid
+ * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by
+ * design, always have reserved bits set. The purpose of the checks is
+ * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+ */
+ if (!is_shadow_present_pte(sptes[leaf]))
+ leaf++;
rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
- for (level = root; level >= leaf; level--) {
- if (!is_shadow_present_pte(sptes[level - 1]))
- break;
+ for (level = root; level >= leaf; level--)
/*
* Use a bitwise-OR instead of a logical-OR to aggregate the
* reserved bit and EPT's invalid memtype/XWR checks to avoid
* adding a Jcc in the loop.
*/
- reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
- __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
- level);
- }
+ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+ __is_rsvd_bits_set(rsvd_check, sptes[level], level);
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
for (level = root; level >= leaf; level--)
pr_err("------ spte 0x%llx level %d.\n",
- sptes[level - 1], level);
+ sptes[level], level);
}
- *sptep = sptes[leaf - 1];
-
return reserved;
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4bd2f1dc0172..2ef8615f9dba 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
}
-#define for_each_tdp_mmu_root(_kvm, _root) \
+static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+ if (kvm_mmu_put_root(kvm, root))
+ kvm_tdp_mmu_free_root(kvm, root);
+}
+
+static inline bool tdp_mmu_next_root_valid(struct kvm *kvm,
+ struct kvm_mmu_page *root)
+{
+ lockdep_assert_held(&kvm->mmu_lock);
+
+ if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link))
+ return false;
+
+ kvm_mmu_get_root(kvm, root);
+ return true;
+
+}
+
+static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ struct kvm_mmu_page *root)
+{
+ struct kvm_mmu_page *next_root;
+
+ next_root = list_next_entry(root, link);
+ tdp_mmu_put_root(kvm, root);
+ return next_root;
+}
+
+/*
+ * Note: this iterator gets and puts references to the roots it iterates over.
+ * This makes it safe to release the MMU lock and yield within the loop, but
+ * if exiting the loop early, the caller must drop the reference to the most
+ * recent root. (Unless keeping a live reference is desirable.)
+ */
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \
+ for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \
+ typeof(*_root), link); \
+ tdp_mmu_next_root_valid(_kvm, _root); \
+ _root = tdp_mmu_next_root(_kvm, _root))
+
+#define for_each_tdp_mmu_root(_kvm, _root) \
list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
@@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
struct kvm_mmu_page *root;
bool flush = false;
- for_each_tdp_mmu_root(kvm, root) {
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
flush |= zap_gfn_range(kvm, root, start, end, true);
- kvm_mmu_put_root(kvm, root);
- }
-
return flush;
}
@@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
int ret = 0;
int as_id;
- for_each_tdp_mmu_root(kvm, root) {
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
as_id = kvm_mmu_page_as_id(root);
slots = __kvm_memslots(kvm, as_id);
kvm_for_each_memslot(memslot, slots) {
@@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
ret |= handler(kvm, memslot, root, gfn_start,
gfn_end, data);
}
-
- kvm_mmu_put_root(kvm, root);
}
return ret;
@@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
@@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
@@ -1029,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
}
@@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_mmu_page *root;
int root_as_id;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
zap_collapsible_spte_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
}
@@ -1160,16 +1152,19 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
{
struct tdp_iter iter;
struct kvm_mmu *mmu = vcpu->arch.mmu;
- int leaf = vcpu->arch.mmu->shadow_root_level;
gfn_t gfn = addr >> PAGE_SHIFT;
+ int leaf = -1;
+
+ *root_level = vcpu->arch.mmu->shadow_root_level;
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
leaf = iter.level;
- sptes[leaf - 1] = iter.old_spte;
+ sptes[leaf] = iter.old_spte;
}
return leaf;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level);
+
#endif /* __KVM_X86_MMU_TDP_MMU_H */