summaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2021-06-22 15:09:34 +0100
committerMarc Zyngier <maz@kernel.org>2021-06-22 15:09:34 +0100
commit9f03db6673598f618f10ba01b3f8824bd5f31a41 (patch)
tree219eab43901af8c7fb1e1498f597ecb03988d51e /arch/arm64
parent2fea6cf7d32141b9e95e30500f1d50a9f92a7371 (diff)
parent04c02c201d7e8149ae336ead69fb64e4e6f94bc9 (diff)
downloadlinux-9f03db6673598f618f10ba01b3f8824bd5f31a41.tar.bz2
Merge branch kvm-arm64/mmu/mte into kvmarm-master/next
KVM/arm64 support for MTE, courtesy of Steven Price. It allows the guest to use memory tagging, and offers a new userspace API to save/restore the tags. * kvm-arm64/mmu/mte: KVM: arm64: Document MTE capability and ioctl KVM: arm64: Add ioctl to fetch/store tags in a guest KVM: arm64: Expose KVM_ARM_CAP_MTE KVM: arm64: Save/restore MTE registers KVM: arm64: Introduce MTE VM feature arm64: mte: Sync tags for pages where PTE is untagged Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h3
-rw-r--r--arch/arm64/include/asm/kvm_host.h13
-rw-r--r--arch/arm64/include/asm/kvm_mte.h66
-rw-r--r--arch/arm64/include/asm/mte-def.h1
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h11
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/mte.c18
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/guest.c82
-rw-r--r--arch/arm64/kvm/hyp/entry.S7
-rw-r--r--arch/arm64/kvm/hyp/exception.c3
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h21
-rw-r--r--arch/arm64/kvm/mmu.c66
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c32
19 files changed, 360 insertions, 17 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 692c9049befa..d436831dd706 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,7 +12,8 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
-#define HCR_ATA (UL(1) << 56)
+#define HCR_ATA_SHIFT 56
+#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_FWB (UL(1) << 46)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 01b9857757f2..fd418955e31e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 |= HCR_TID2;
+
+ if (kvm_has_mte(vcpu->kvm))
+ vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 6336b4309114..61d9749284d1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -133,6 +133,9 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
+
+ /* Memory Tagging Extension enabled for the guest */
+ bool mte_enabled;
};
struct kvm_vcpu_fault_info {
@@ -207,6 +210,12 @@ enum vcpu_sysreg {
CNTP_CVAL_EL0,
CNTP_CTL_EL0,
+ /* Memory Tagging Extension registers */
+ RGSR_EL1, /* Random Allocation Tag Seed Register */
+ GCR_EL1, /* Tag Control Register */
+ TFSR_EL1, /* Tag Fault Status Register (EL1) */
+ TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -722,6 +731,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags);
+
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
@@ -770,6 +782,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
+#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..de002636eb1f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+ b .L__skip_switch\@
+alternative_else_nop_endif
+ mrs \reg1, hcr_el2
+ tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+ mrs_s \reg1, SYS_RGSR_EL1
+ str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
+ mrs_s \reg1, SYS_GCR_EL1
+ str \reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+ ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
+ msr_s SYS_RGSR_EL1, \reg1
+ ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
+ msr_s SYS_GCR_EL1, \reg1
+
+ isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index cf241b0f0a42..626d359b396e 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -7,6 +7,7 @@
#define MTE_GRANULE_SIZE UL(16)
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
#define MTE_TAG_SHIFT 56
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..347ef38a35f7 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
-void mte_sync_tags(pte_t *ptep, pte_t pte);
+void mte_sync_tags(pte_t old_pte, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
-static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b10204e72fc..db5402168841 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
- if (system_supports_mte() &&
- pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
- mte_sync_tags(ptep, pte);
+ /*
+ * If the PTE would provide user space access to the tags associated
+ * with it then ensure that the MTE tags are synchronised. Although
+ * pte_access_permitted() returns false for exec only mappings, they
+ * don't expose tags (instruction fetches don't check tags).
+ */
+ if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ !pte_special(pte)) {
+ pte_t old_pte = READ_ONCE(*ptep);
+ /*
+ * We only need to synchronise if the new PTE has tags enabled
+ * or if swapping in (in which case another mapping may have
+ * set tags in the past even if this PTE isn't tagged).
+ * (!pte_none() && !pte_present()) is an open coded version of
+ * is_swap_pte()
+ */
+ if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
+ mte_sync_tags(old_pte, pte);
+ }
__check_racy_pte_update(mm, ptep, pte);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a168..347ccac2341e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -651,7 +651,8 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
- SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+ SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
+ SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 24223adae150..b3edde68bc3e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
+struct kvm_arm_copy_mte_tags {
+ __u64 guest_ipa;
+ __u64 length;
+ void __user *addr;
+ __u64 flags;
+ __u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST 0
+#define KVM_ARM_TAGS_FROM_GUEST 1
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e73..6f0044cb233e 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -111,6 +111,8 @@ int main(void)
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
+ DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+ DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 125a10e413e9..69b3fde8759e 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
EXPORT_SYMBOL_GPL(mte_async_mode);
#endif
-static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+ bool check_swap, bool pte_is_tagged)
{
- pte_t old_pte = READ_ONCE(*ptep);
-
if (check_swap && is_swap_pte(old_pte)) {
swp_entry_t entry = pte_to_swp_entry(old_pte);
@@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
return;
}
+ if (!pte_is_tagged)
+ return;
+
page_kasan_tag_reset(page);
/*
* We need smp_wmb() in between setting the flags and clearing the
@@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
mte_clear_page_tags(page_address(page));
}
-void mte_sync_tags(pte_t *ptep, pte_t pte)
+void mte_sync_tags(pte_t old_pte, pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
bool check_swap = nr_pages == 1;
+ bool pte_is_tagged = pte_tagged(pte);
+
+ /* Early out if there's nothing to do */
+ if (!check_swap && !pte_is_tagged)
+ return;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
- mte_sync_page_tags(page, ptep, check_swap);
+ mte_sync_page_tags(page, old_pte, check_swap,
+ pte_is_tagged);
}
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index b0294f2e95ba..e0b81870ff2a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
break;
+ case KVM_CAP_ARM_MTE:
+ if (!system_supports_mte() || kvm->created_vcpus)
+ return -EINVAL;
+ r = 0;
+ kvm->arch.mte_enabled = true;
+ break;
default:
r = -EINVAL;
break;
@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
+ case KVM_CAP_ARM_MTE:
+ r = system_supports_mte();
+ break;
case KVM_CAP_STEAL_TIME:
r = kvm_arm_pvtime_supported();
break;
@@ -1354,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
+ case KVM_ARM_MTE_COPY_TAGS: {
+ struct kvm_arm_copy_mte_tags copy_tags;
+
+ if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
+ return -EFAULT;
+ return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
+ }
default:
return -EINVAL;
}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5cb4a1cd5603..4ddb20017b2f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
+
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+ struct kvm_arm_copy_mte_tags *copy_tags)
+{
+ gpa_t guest_ipa = copy_tags->guest_ipa;
+ size_t length = copy_tags->length;
+ void __user *tags = copy_tags->addr;
+ gpa_t gfn;
+ bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
+ int ret = 0;
+
+ if (!kvm_has_mte(kvm))
+ return -EINVAL;
+
+ if (copy_tags->reserved[0] || copy_tags->reserved[1])
+ return -EINVAL;
+
+ if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
+ return -EINVAL;
+
+ if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
+ return -EINVAL;
+
+ gfn = gpa_to_gfn(guest_ipa);
+
+ mutex_lock(&kvm->slots_lock);
+
+ while (length > 0) {
+ kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+ void *maddr;
+ unsigned long num_tags;
+ struct page *page;
+
+ if (is_error_noslot_pfn(pfn)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ page = pfn_to_online_page(pfn);
+ if (!page) {
+ /* Reject ZONE_DEVICE memory */
+ ret = -EFAULT;
+ goto out;
+ }
+ maddr = page_address(page);
+
+ if (!write) {
+ if (test_bit(PG_mte_tagged, &page->flags))
+ num_tags = mte_copy_tags_to_user(tags, maddr,
+ MTE_GRANULES_PER_PAGE);
+ else
+ /* No tags in memory, so write zeros */
+ num_tags = MTE_GRANULES_PER_PAGE -
+ clear_user(tags, MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_clean(pfn);
+ } else {
+ num_tags = mte_copy_tags_from_user(maddr, tags,
+ MTE_GRANULES_PER_PAGE);
+ kvm_release_pfn_dirty(pfn);
+ }
+
+ if (num_tags != MTE_GRANULES_PER_PAGE) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* Set the flag after checking the write completed fully */
+ if (write)
+ set_bit(PG_mte_tagged, &page->flags);
+
+ gfn++;
+ tags += num_tags;
+ length -= PAGE_SIZE;
+ }
+
+out:
+ mutex_unlock(&kvm->slots_lock);
+ /* If some data has been copied report the number of bytes copied */
+ if (length != copy_tags->length)
+ return copy_tags->length - length;
+ return ret;
+}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d..435346ea1504 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
#include <asm/kvm_ptrauth.h>
.text
@@ -51,6 +52,9 @@ alternative_else_nop_endif
add x29, x0, #VCPU_CONTEXT
+ // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+ mte_switch_to_guest x29, x1, x2
+
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
+ // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+ mte_switch_to_hyp x1, x2, x3
+
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 11541b94b328..0418399e0a20 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
- // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+ if (kvm_has_mte(vcpu->kvm))
+ new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..de7e14c862e6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+ if (!vcpu)
+ vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+ return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+ ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+ }
+
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
+ if (ctxt_has_mte(ctxt)) {
+ write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+ write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+ }
+
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index bf389dfc885d..57292dc5ce35 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -853,6 +853,45 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
return PAGE_SHIFT;
}
+/*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ * racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ * an mprotect() to add VM_MTE
+ */
+static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
+{
+ unsigned long i, nr_pages = size >> PAGE_SHIFT;
+ struct page *page;
+
+ if (!kvm_has_mte(kvm))
+ return 0;
+
+ /*
+ * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+ * that may not support tags.
+ */
+ page = pfn_to_online_page(pfn);
+
+ if (!page)
+ return -EFAULT;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (!test_bit(PG_mte_tagged, &page->flags)) {
+ mte_clear_page_tags(page_address(page));
+ set_bit(PG_mte_tagged, &page->flags);
+ }
+ }
+
+ return 0;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
@@ -861,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
+ bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -907,6 +947,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_shift = get_vma_page_shift(vma, hva);
}
+ shared = (vma->vm_flags & VM_PFNMAP);
+
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
@@ -1011,6 +1053,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
+
+ if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
+ /* Check the VMM hasn't introduced a new VM_SHARED VMA */
+ if (!shared)
+ ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ else
+ ret = -EFAULT;
+ if (ret)
+ goto out_unlock;
+ }
+
if (writable)
prot |= KVM_PGTABLE_PROT_W;
@@ -1206,12 +1259,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
+ int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
+ ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+ if (ret)
+ return false;
+
/*
* We've moved a page around, probably through CoW, so let's treat
* it just like a translation fault and the map handler will clean
@@ -1414,6 +1472,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (!vma)
break;
+ /*
+ * VM_SHARED mappings are not allowed with MTE to avoid races
+ * when updating the PG_mte_tagged page flag, see
+ * sanitise_mte_tags for more details.
+ */
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+ return -EINVAL;
+
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d37ebee085cf..cba7872d69a8 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
+ /* MTE is incompatible with AArch32 */
+ if (kvm_has_mte(vcpu->kvm) && is32bit)
+ return false;
+
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a7968ad078c..f6f126eb6ac1 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~FEATURE(ID_AA64PFR1_MTE);
+ if (kvm_has_mte(vcpu->kvm)) {
+ u64 pfr, mte;
+
+ pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+ }
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_mte(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+#define MTE_REG(name) { \
+ SYS_DESC(SYS_##name), \
+ .access = undef_access, \
+ .reset = reset_unknown, \
+ .reg = name, \
+ .visibility = mte_visibility, \
+}
+
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
@@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
- { SYS_DESC(SYS_RGSR_EL1), undef_access },
- { SYS_DESC(SYS_GCR_EL1), undef_access },
+ MTE_REG(RGSR_EL1),
+ MTE_REG(GCR_EL1),
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
- { SYS_DESC(SYS_TFSR_EL1), undef_access },
- { SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+ MTE_REG(TFSR_EL1),
+ MTE_REG(TFSRE0_EL1),
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },