summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2018-03-28 16:09:09 +0200
committerRadim Krčmář <rkrcmar@redhat.com>2018-03-28 16:09:09 +0200
commitabe7a4586f0cf0ef35ed5f713ccfd4eb0770a5ac (patch)
treec5f5bae60fdd3973b0c5d14a6405a7b66dbb5121
parentd32ef547fdbbeb9c4351f9d3bc84dec998a3be8c (diff)
parentdc6ed61d2f824a595033744fc1f3bf4cb98768b5 (diff)
downloadlinux-abe7a4586f0cf0ef35ed5f713ccfd4eb0770a5ac.tar.bz2
Merge tag 'kvm-arm-for-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
KVM/ARM updates for v4.17 - VHE optimizations - EL2 address space randomization - Variant 3a mitigation for Cortex-A57 and A72 - The usual vgic fixes - Various minor tidying-up
-rw-r--r--Documentation/arm64/memory.txt9
-rw-r--r--arch/arm/include/asm/kvm_asm.h5
-rw-r--r--arch/arm/include/asm/kvm_emulate.h21
-rw-r--r--arch/arm/include/asm/kvm_host.h6
-rw-r--r--arch/arm/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm/include/uapi/asm/kvm.h9
-rw-r--r--arch/arm/kvm/coproc.c61
-rw-r--r--arch/arm/kvm/emulate.c4
-rw-r--r--arch/arm/kvm/hyp/Makefile1
-rw-r--r--arch/arm/kvm/hyp/switch.c16
-rw-r--r--arch/arm64/Kconfig16
-rw-r--r--arch/arm64/include/asm/alternative.h41
-rw-r--r--arch/arm64/include/asm/cpucaps.h2
-rw-r--r--arch/arm64/include/asm/insn.h16
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_asm.h19
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h78
-rw-r--r--arch/arm64/include/asm/kvm_host.h53
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h29
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h165
-rw-r--r--arch/arm64/include/asm/mmu.h8
-rw-r--r--arch/arm64/include/asm/sysreg.h6
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c43
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/bpi.S67
-rw-r--r--arch/arm64/kernel/cpu_errata.c25
-rw-r--r--arch/arm64/kernel/cpufeature.c19
-rw-r--r--arch/arm64/kernel/head.S7
-rw-r--r--arch/arm64/kernel/insn.c190
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/debug.c29
-rw-r--r--arch/arm64/kvm/guest.c3
-rw-r--r--arch/arm64/kvm/hyp-init.S1
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c88
-rw-r--r--arch/arm64/kvm/hyp/entry.S6
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S86
-rw-r--r--arch/arm64/kvm/hyp/switch.c382
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c172
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c78
-rw-r--r--arch/arm64/kvm/inject_fault.c24
-rw-r--r--arch/arm64/kvm/regmap.c67
-rw-r--r--arch/arm64/kvm/sys_regs.c199
-rw-r--r--arch/arm64/kvm/sys_regs.h4
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c4
-rw-r--r--arch/arm64/kvm/va_layout.c227
-rw-r--r--include/kvm/arm_vgic.h15
-rw-r--r--include/linux/irqchip/arm-gic-v3.h1
-rw-r--r--include/linux/irqchip/arm-gic.h1
-rw-r--r--virt/kvm/arm/aarch32.c2
-rw-r--r--virt/kvm/arm/arch_timer.c16
-rw-r--r--virt/kvm/arm/arm.c57
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c44
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c159
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c246
-rw-r--r--virt/kvm/arm/mmu.c180
-rw-r--r--virt/kvm/arm/pmu.c36
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c17
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c15
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c163
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c75
-rw-r--r--virt/kvm/arm/vgic/vgic.c120
-rw-r--r--virt/kvm/arm/vgic/vgic.h6
67 files changed, 2435 insertions, 1045 deletions
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 671bc0639262..c5dab30d3389 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -86,9 +86,12 @@ Translation table lookup with 64KB pages:
+-------------------------------------------------> [63] TTBR0/1
-When using KVM without the Virtualization Host Extensions, the hypervisor
-maps kernel pages in EL2 at a fixed offset from the kernel VA. See the
-kern_hyp_va macro for more details.
+When using KVM without the Virtualization Host Extensions, the
+hypervisor maps kernel pages in EL2 at a fixed (and potentially
+random) offset from the linear mapping. See the kern_hyp_va macro and
+kvm_update_va_mask function for more details. MMIO devices such as
+GICv2 gets mapped next to the HYP idmap page, as do vectors when
+ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
When using KVM with the Virtualization Host Extensions, no additional
mappings are created, since the host kernel runs directly in EL2.
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 36dd2962a42d..5a953ecb0d78 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+/* no VHE on 32-bit :( */
+static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; }
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern void __init_stage2_translation(void);
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9003bd19cb70..6493bd479ddc 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num)
return vcpu_reg(vcpu, reg_num);
}
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
+
+static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
+{
+ return *__vcpu_spsr(vcpu);
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ *__vcpu_spsr(vcpu) = v;
+}
static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
u8 reg_num)
@@ -92,14 +102,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr = HCR_GUEST_MASK;
}
-static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hcr;
-}
-
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr = hcr;
+ return (unsigned long *)&vcpu->arch.hcr;
}
static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 248b930563e5..c6a749568dd6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -155,9 +155,6 @@ struct kvm_vcpu_arch {
/* HYP trapping configuration */
u32 hcr;
- /* Interrupt related fields */
- u32 irq_lines; /* IRQ and FIQ levels */
-
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -315,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return false;
}
+static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 1ab8329e9ff7..e93a0cac9add 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,6 +110,10 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index de1b919404e4..707a1f06dc5d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -28,6 +28,13 @@
*/
#define kern_hyp_va(kva) (kva)
+/* Contrary to arm64, there is no need to generate a PC-relative address */
+#define hyp_symbol_addr(s) \
+ ({ \
+ typeof(s) *addr = &(s); \
+ addr; \
+ })
+
/*
* KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
*/
@@ -42,8 +49,15 @@
#include <asm/pgalloc.h>
#include <asm/stage2_pgtable.h>
+/* Ensure compatibility with arm64 */
+#define VA_BITS 32
+
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 6edd177bb1c7..2ba95d6fe852 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_CRM_SHIFT 7
#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
#define KVM_REG_ARM_32_CRN_SHIFT 11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT 28
#define ARM_CP15_REG_SHIFT_MASK(x,n) \
(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 6d1d2e26dfe5..3a02e76699a6 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -270,6 +270,60 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_cntp_tval(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u64 now = kvm_phys_timer_read();
+ u64 val;
+
+ if (p->is_write) {
+ val = *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+ *vcpu_reg(vcpu, p->Rt1) = val - now;
+ }
+
+ return true;
+}
+
+static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u32 val;
+
+ if (p->is_write) {
+ val = *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+ *vcpu_reg(vcpu, p->Rt1) = val;
+ }
+
+ return true;
+}
+
+static bool access_cntp_cval(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ u64 val;
+
+ if (p->is_write) {
+ val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
+ val |= *vcpu_reg(vcpu, p->Rt1);
+ kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val);
+ } else {
+ val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+ *vcpu_reg(vcpu, p->Rt1) = val;
+ *vcpu_reg(vcpu, p->Rt2) = val >> 32;
+ }
+
+ return true;
+}
+
/*
* We could trap ID_DFR0 and tell the guest we don't support performance
* monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -423,10 +477,17 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
NULL, reset_unknown, c13_TID_PRIV },
+ /* CNTP */
+ { CRm64(14), Op1( 2), is64, access_cntp_cval},
+
/* CNTKCTL: swapped by interrupt.S. */
{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+ /* CNTP */
+ { CRn(14), CRm( 2), Op1( 0), Op2( 0), is32, access_cntp_tval },
+ { CRn(14), CRm( 2), Op1( 0), Op2( 1), is32, access_cntp_ctl },
+
/* The Configuration Base Address Register. */
{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index cdff963f133a..9046b53d87c1 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
{
unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
switch (mode) {
@@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
*/
void kvm_inject_vabt(struct kvm_vcpu *vcpu)
{
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA);
+ *vcpu_hcr(vcpu) |= HCR_VA;
}
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 63d6b404d88e..7fc0638f263a 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -9,7 +9,6 @@ KVM=../../../../virt/kvm
CFLAGS_ARMV7VE :=$(call cc-option, -march=armv7ve)
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ae45ae96aac2..acf1c37fa49c 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
isb();
}
- write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
+ write_sysreg(vcpu->arch.hcr, HCR);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(HSTR_T(15), HSTR);
write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
@@ -90,18 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
- else
- __vgic_v2_save_state(vcpu);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
}
static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
- else
- __vgic_v2_restore_state(vcpu);
+ }
}
static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
@@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
return true;
}
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7381eeb7ef8e..48ad7ca23f39 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -904,6 +904,22 @@ config HARDEN_BRANCH_PREDICTOR
If unsure, say Y.
+config HARDEN_EL2_VECTORS
+ bool "Harden EL2 vector mapping against system register leak" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to leak privileged information such as the vector base
+ register, resulting in a potential defeat of the EL2 layout
+ randomization.
+
+ This config option will map the vectors to a fixed location,
+ independent of the EL2 code mapping, so that revealing VBAR_EL2
+ to an attacker does not give away any extra information. This
+ only gets enabled on affected CPUs.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 669028172fd6..a91933b1e2e6 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -5,6 +5,8 @@
#include <asm/cpucaps.h>
#include <asm/insn.h>
+#define ARM64_CB_PATCH ARM64_NCAPS
+
#ifndef __ASSEMBLY__
#include <linux/init.h>
@@ -22,12 +24,19 @@ struct alt_instr {
u8 alt_len; /* size of new instruction(s), <= orig_len */
};
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
void __init apply_alternatives_all(void);
void apply_alternatives(void *start, size_t length);
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(feature,cb) \
" .word 661b - .\n" /* label */ \
+ " .if " __stringify(cb) " == 0\n" \
" .word 663f - .\n" /* new instruction */ \
+ " .else\n" \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .endif\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length);
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(feature,cb) \
".popsection\n" \
+ " .if " __stringify(cb) " == 0\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
"663:\n\t" \
newinstr "\n" \
@@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length);
".popsection\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n" \
+ ".else\n\t" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
#else
#include <asm/assembler.h>
@@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length);
661:
.endm
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+ .popsection
+661:
+.endm
+
/*
* Provide the other half of the alternative code sequence.
*/
@@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length);
.endm
/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
* previous case.
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index bb263820de13..d4cc54ed0656 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -32,7 +32,7 @@
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_HYP_OFFSET_LOW 14
+#define ARM64_HARDEN_EL2_VECTORS 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
#define ARM64_HAS_NO_FPSIMD 16
#define ARM64_WORKAROUND_REPEAT_TLBI 17
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4214c38d016b..f62c56b1793f 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -70,6 +70,7 @@ enum aarch64_insn_imm_type {
AARCH64_INSN_IMM_6,
AARCH64_INSN_IMM_S,
AARCH64_INSN_IMM_R,
+ AARCH64_INSN_IMM_N,
AARCH64_INSN_IMM_MAX
};
@@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor, 0x7F200000, 0x4A000000)
__AARCH64_INSN_FUNCS(eon, 0x7F200000, 0x4A200000)
__AARCH64_INSN_FUNCS(ands, 0x7F200000, 0x6A000000)
__AARCH64_INSN_FUNCS(bics, 0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(and_imm, 0x7F800000, 0x12000000)
+__AARCH64_INSN_FUNCS(orr_imm, 0x7F800000, 0x32000000)
+__AARCH64_INSN_FUNCS(eor_imm, 0x7F800000, 0x52000000)
+__AARCH64_INSN_FUNCS(ands_imm, 0x7F800000, 0x72000000)
+__AARCH64_INSN_FUNCS(extr, 0x7FA00000, 0x13800000)
__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
__AARCH64_INSN_FUNCS(cbz, 0x7F000000, 0x34000000)
@@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
int shift,
enum aarch64_insn_variant variant,
enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm);
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb);
u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b0c84171e6a3..6dd285e979c9 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -25,6 +25,7 @@
/* Hyp Configuration Register (HCR) bits */
#define HCR_TEA (UL(1) << 37)
#define HCR_TERR (UL(1) << 36)
+#define HCR_TLOR (UL(1) << 35)
#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
@@ -64,6 +65,7 @@
/*
* The bits we set in HCR:
+ * TLOR: Trap LORegion register accesses
* RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
@@ -81,9 +83,9 @@
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
- HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+ HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
/* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..d53d40704416 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+/* Translate a kernel address of @sym into its equivalent linear mapping */
#define kvm_ksym_ref(sym) \
({ \
void *val = &sym; \
@@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void);
extern u64 __vgic_v3_read_vmcr(void);
@@ -70,6 +73,20 @@ extern u32 __init_stage2_translation(void);
extern void __qcom_hyp_sanitize_btac_predictors(void);
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+ adr_l \reg, kvm_host_cpu_state
+ mrs \tmp, tpidr_el2
+ add \reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+ get_host_ctxt \ctxt, \vcpu
+ ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+ kern_hyp_va \vcpu
+.endm
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 413dc82b1e89..23b33e8ea03a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,13 +26,15 @@
#include <asm/esr.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmio.h>
#include <asm/ptrace.h>
#include <asm/cputype.h>
#include <asm/virt.h>
unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
+static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.hcr_el2 & HCR_RW);
+}
+
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
@@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
-}
-static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.hcr_el2;
+ /*
+ * TID3: trap feature register accesses that we virtualise.
+ * For now this is conditional, since no AArch32 feature regs
+ * are currently virtualised.
+ */
+ if (!vcpu_el1_is_32bit(vcpu))
+ vcpu->arch.hcr_el2 |= HCR_TID3;
}
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
{
- vcpu->arch.hcr_el2 = hcr;
+ return (unsigned long *)&vcpu->arch.hcr_el2;
}
static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
@@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
}
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
}
+static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(elr);
+ else
+ return *__vcpu_elr_el1(vcpu);
+}
+
+static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, elr);
+ else
+ *__vcpu_elr_el1(vcpu) = v;
+}
+
static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
@@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
}
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
- return vcpu_spsr32(vcpu);
+ return vcpu_read_spsr32(vcpu);
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ return read_sysreg_el1(spsr);
+ else
+ return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ if (vcpu_mode_is_32bit(vcpu)) {
+ vcpu_write_spsr32(vcpu, v);
+ return;
+ }
+
+ if (vcpu->arch.sysregs_loaded_on_cpu)
+ write_sysreg_el1(v, spsr);
+ else
+ vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
@@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
{
- return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+ return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
- if (vcpu_mode_is_32bit(vcpu))
+ if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
- else
- vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+ } else {
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ sctlr |= (1 << 25);
+ vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+ }
}
static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
@@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
- return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..ab46bc70add6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -272,9 +272,6 @@ struct kvm_vcpu_arch {
/* IO related fields */
struct kvm_decode mmio_decode;
- /* Interrupt related fields */
- u64 irq_lines; /* IRQ and FIQ levels */
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -287,10 +284,25 @@ struct kvm_vcpu_arch {
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
+
+ /* True when deferrable sysregs are loaded on the physical CPU,
+ * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+ bool sysregs_loaded_on_cpu;
};
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a running VCPU. For
+ * example, for userspace access or for system registers that are never context
+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
+
/*
* CP14 and CP15 live in the same array, as they are backed by the
* same system registers.
@@ -298,14 +310,6 @@ struct kvm_vcpu_arch {
#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1)
-#else
-#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
-#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r))
-#endif
-
struct kvm_vm_stat {
ulong remote_tlb_flush;
};
@@ -358,10 +362,15 @@ int kvm_perf_teardown(void);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
+ u64 tpidr_el2;
+
/*
* Call initialization code, and switch to the full blown HYP code.
* If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +379,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
*/
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+ /*
+ * Calculate the raw per-cpu offset without a translation from the
+ * kernel's mapping to the linear mapping, and store it in tpidr_el2
+ * so that we can use adr_l to access per-cpu variables in EL2.
+ */
+ tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+ - (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+ kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
}
static inline void kvm_arch_hardware_unsetup(void) {}
@@ -416,6 +435,13 @@ static inline void kvm_arm_vhe_guest_enter(void)
static inline void kvm_arm_vhe_guest_exit(void)
{
local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+ /*
+ * When we exit from the guest we change a number of CPU configuration
+ * parameters, such as traps. Make sure these changes take effect
+ * before running the host or additional guests.
+ */
+ isb();
}
static inline bool kvm_arm_harden_branch_predictor(void)
@@ -423,4 +449,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
}
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index f26f9cd70c72..384c34397619 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void) \
return val; \
}
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __timer_enable_traps(struct kvm_vcpu *vcpu);
void __timer_disable_traps(struct kvm_vcpu *vcpu);
-void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
void __sysreg32_save_state(struct kvm_vcpu *vcpu);
void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
-void __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
bool __fpsimd_enabled(void);
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
+void deactivate_traps_vhe_put(void);
+
u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
void __noreturn __hyp_do_panic(unsigned long, ...);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7faed6e48b46..082110993647 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -69,9 +69,6 @@
* mappings, and none of this applies in that case.
*/
-#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1)
-#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1)
-
#ifdef __ASSEMBLY__
#include <asm/alternative.h>
@@ -81,28 +78,19 @@
* Convert a kernel VA into a HYP VA.
* reg: VA to be converted.
*
- * This generates the following sequences:
- * - High mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * nop
- * - Low mask:
- * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK
- * - VHE:
- * nop
- * nop
- *
- * The "low mask" version works because the mask is a strict subset of
- * the "high mask", hence performing the first mask for nothing.
- * Should be completely invisible on any viable CPU.
+ * The actual code generation takes place in kvm_update_va_mask, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask uses the
+ * specific registers encoded in the instructions).
*/
.macro kern_hyp_va reg
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else_nop_endif
-alternative_if ARM64_HYP_OFFSET_LOW
- and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_else_nop_endif
+alternative_cb kvm_update_va_mask
+ and \reg, \reg, #1 /* mask with va_mask */
+ ror \reg, \reg, #1 /* rotate to the first tag bit */
+ add \reg, \reg, #0 /* insert the low 12 bits of the tag */
+ add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */
+ ror \reg, \reg, #63 /* rotate back */
+alternative_cb_end
.endm
#else
@@ -113,24 +101,44 @@ alternative_else_nop_endif
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
+void kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
static inline unsigned long __kern_hyp_va(unsigned long v)
{
- asm volatile(ALTERNATIVE("and %0, %0, %1",
- "nop",
- ARM64_HAS_VIRT_HOST_EXTN)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_HIGH_MASK));
- asm volatile(ALTERNATIVE("nop",
- "and %0, %0, %1",
- ARM64_HYP_OFFSET_LOW)
- : "+r" (v)
- : "i" (HYP_PAGE_OFFSET_LOW_MASK));
+ asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
+ "ror %0, %0, #1\n"
+ "add %0, %0, #0\n"
+ "add %0, %0, #0, lsl 12\n"
+ "ror %0, %0, #63\n",
+ kvm_update_va_mask)
+ : "+r" (v));
return v;
}
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
/*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s) \
+ ({ \
+ typeof(s) *addr; \
+ asm("adrp %0, %1\n" \
+ "add %0, %0, :lo12:%1\n" \
+ : "=r" (addr) : "S" (&s)); \
+ addr; \
+ })
+
+/*
* We currently only support a 40bit IPA.
*/
#define KVM_PHYS_SHIFT (40)
@@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
#include <asm/stage2_pgtable.h>
int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr);
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
@@ -249,7 +261,7 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
- return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+ return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
}
static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
@@ -348,36 +360,95 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
+ * hardening sequence is placed in one of the vector slots, which is
+ * executed before jumping to the real vectors.
+ *
+ * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
+ * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
+ * hardening sequence is mapped next to the idmap page, and executed
+ * before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
+ * empty slot is selected, mapped next to the idmap page, and
+ * executed before jumping to the real vectors.
+ *
+ * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
#include <asm/mmu.h>
+extern void *__kvm_bp_vect_base;
+extern int __kvm_harden_el2_vector_slot;
+
static inline void *kvm_get_hyp_vector(void)
{
struct bp_hardening_data *data = arm64_get_bp_hardening_data();
- void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+ void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+ int slot = -1;
- if (data->fn) {
- vect = __bp_harden_hyp_vecs_start +
- data->hyp_vectors_slot * SZ_2K;
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+ vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
+ slot = data->hyp_vectors_slot;
+ }
- if (!has_vhe())
- vect = lm_alias(vect);
+ if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
+ vect = __kvm_bp_vect_base;
+ if (slot == -1)
+ slot = __kvm_harden_el2_vector_slot;
}
+ if (slot != -1)
+ vect += slot * SZ_2K;
+
return vect;
}
+/* This is only called on a !VHE system */
static inline int kvm_map_vectors(void)
{
- return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
- kvm_ksym_ref(__bp_harden_hyp_vecs_end),
- PAGE_HYP_EXEC);
-}
+ /*
+ * HBP = ARM64_HARDEN_BRANCH_PREDICTOR
+ * HEL2 = ARM64_HARDEN_EL2_VECTORS
+ *
+ * !HBP + !HEL2 -> use direct vectors
+ * HBP + !HEL2 -> use hardened vectors in place
+ * !HBP + HEL2 -> allocate one vector slot and use exec mapping
+ * HBP + HEL2 -> use hardened vertors and use exec mapping
+ */
+ if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+ __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
+ __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
+ }
+
+ if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start);
+ unsigned long size = (__bp_harden_hyp_vecs_end -
+ __bp_harden_hyp_vecs_start);
+
+ /*
+ * Always allocate a spare vector slot, as we don't
+ * know yet which CPUs have a BP hardening slot that
+ * we can reuse.
+ */
+ __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
+ return create_hyp_exec_mappings(vect_pa, size,
+ &__kvm_bp_vect_base);
+ }
+ return 0;
+}
#else
static inline void *kvm_get_hyp_vector(void)
{
- return kvm_ksym_ref(__kvm_hyp_vector);
+ return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
}
static inline int kvm_map_vectors(void)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a050d4f3615d..dd320df0d026 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -21,6 +21,8 @@
#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
#define TTBR_ASID_MASK (UL(0xffff) << 48)
+#define BP_HARDEN_EL2_SLOTS 4
+
#ifndef __ASSEMBLY__
typedef struct {
@@ -49,9 +51,13 @@ struct bp_hardening_data {
bp_hardening_cb_t fn;
};
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \
+ defined(CONFIG_HARDEN_EL2_VECTORS))
extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+extern atomic_t arm64_el2_vector_last_slot;
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0e1960c59197..69a99856461c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -288,6 +288,12 @@
#define SYS_MAIR_EL1 sys_reg(3, 0, 10, 2, 0)
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
+#define SYS_LORSA_EL1 sys_reg(3, 0, 10, 4, 0)
+#define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1)
+#define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2)
+#define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3)
+#define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7)
+
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index b87541360f43..93bce17109a6 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -54,9 +54,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
-ifeq ($(CONFIG_KVM),y)
-arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
-endif
+arm64-obj-$(CONFIG_KVM_INDIRECT_VECTORS)+= bpi.o
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 414288a558c8..5c4bce4ac381 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
+static void patch_alternative(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ __le32 *replptr;
+ int i;
+
+ replptr = ALT_REPL_PTR(alt);
+ for (i = 0; i < nr_inst; i++) {
+ u32 insn;
+
+ insn = get_alt_insn(alt, origptr + i, replptr + i);
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- __le32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *updptr;
+ alternative_cb_t alt_cb;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i, nr_inst;
+ int nr_inst;
- if (!cpus_have_cap(alt->cpufeature))
+ /* Use ARM64_CB_PATCH as an unconditional patch */
+ if (alt->cpufeature < ARM64_CB_PATCH &&
+ !cpus_have_cap(alt->cpufeature))
continue;
- BUG_ON(alt->alt_len != alt->orig_len);
+ if (alt->cpufeature == ARM64_CB_PATCH)
+ BUG_ON(alt->alt_len != 0);
+ else
+ BUG_ON(alt->alt_len != alt->orig_len);
pr_info_once("patching kernel code\n");
origptr = ALT_ORIG_PTR(alt);
- replptr = ALT_REPL_PTR(alt);
updptr = use_linear_alias ? lm_alias(origptr) : origptr;
- nr_inst = alt->alt_len / sizeof(insn);
+ nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- for (i = 0; i < nr_inst; i++) {
- insn = get_alt_insn(alt, origptr + i, replptr + i);
- updptr[i] = cpu_to_le32(insn);
- }
+ if (alt->cpufeature < ARM64_CB_PATCH)
+ alt_cb = patch_alternative;
+ else
+ alt_cb = ALT_REPL_PTR(alt);
+
+ alt_cb(alt, origptr, updptr, nr_inst);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + nr_inst));
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04110cd..78e1b0a70aaf 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -138,6 +138,7 @@ int main(void)
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
+ DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
#endif
#ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
index e5de33513b5d..bb0b67722e86 100644
--- a/arch/arm64/kernel/bpi.S
+++ b/arch/arm64/kernel/bpi.S
@@ -19,42 +19,61 @@
#include <linux/linkage.h>
#include <linux/arm-smccc.h>
-.macro ventry target
- .rept 31
+#include <asm/alternative.h>
+#include <asm/mmu.h>
+
+.macro hyp_ventry
+ .align 7
+1: .rept 27
nop
.endr
- b \target
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp x0, x1, [sp, #-16]!
+ * movz x0, #(addr & 0xffff)
+ * movk x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk x0, #((addr >> 32) & 0xffff), lsl #32
+ * br x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb kvm_patch_vector_branch
+ b __kvm_hyp_vector + (1b - 0b)
+ nop
+ nop
+ nop
+ nop
+alternative_cb_end
.endm
-.macro vectors target
- ventry \target + 0x000
- ventry \target + 0x080
- ventry \target + 0x100
- ventry \target + 0x180
-
- ventry \target + 0x200
- ventry \target + 0x280
- ventry \target + 0x300
- ventry \target + 0x380
+.macro generate_vectors
+0:
+ .rept 16
+ hyp_ventry
+ .endr
+ .org 0b + SZ_2K // Safety measure
+.endm
- ventry \target + 0x400
- ventry \target + 0x480
- ventry \target + 0x500
- ventry \target + 0x580
- ventry \target + 0x600
- ventry \target + 0x680
- ventry \target + 0x700
- ventry \target + 0x780
-.endm
+ .text
+ .pushsection .hyp.text, "ax"
.align 11
ENTRY(__bp_harden_hyp_vecs_start)
- .rept 4
- vectors __kvm_hyp_vector
+ .rept BP_HARDEN_EL2_SLOTS
+ generate_vectors
.endr
ENTRY(__bp_harden_hyp_vecs_end)
+ .popsection
+
ENTRY(__qcom_hyp_sanitize_link_stack_start)
stp x29, x30, [sp, #-16]!
.rept 16
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 52f15cd896e1..7369ad552700 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -60,6 +60,8 @@ static int cpu_enable_trap_ctr_access(void *__unused)
return 0;
}
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -90,7 +92,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
const char *hyp_vecs_end)
{
- static int last_slot = -1;
static DEFINE_SPINLOCK(bp_lock);
int cpu, slot = -1;
@@ -103,10 +104,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
}
if (slot == -1) {
- last_slot++;
- BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
- / SZ_2K) <= last_slot);
- slot = last_slot;
+ slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+ BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
}
@@ -242,6 +241,10 @@ static int qcom_enable_link_stack_sanitization(void *data)
.midr_range_min = 0, \
.midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK)
+#ifndef ERRATA_MIDR_ALL_VERSIONS
+#define ERRATA_MIDR_ALL_VERSIONS(x) MIDR_ALL_VERSIONS(x)
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -426,6 +429,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.enable = enable_smccc_arch_workaround_1,
},
#endif
+#ifdef CONFIG_HARDEN_EL2_VECTORS
+ {
+ .desc = "Cortex-A57 EL2 vector hardening",
+ .capability = ARM64_HARDEN_EL2_VECTORS,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ },
+ {
+ .desc = "Cortex-A72 EL2 vector hardening",
+ .capability = ARM64_HARDEN_EL2_VECTORS,
+ ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 2985a067fc13..5b25d56bccfd 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -831,19 +831,6 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused
return is_kernel_in_hyp_mode();
}
-static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
- int __unused)
-{
- phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
-
- /*
- * Activate the lower HYP offset only if:
- * - the idmap doesn't clash with it,
- * - the kernel is not running at EL2.
- */
- return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
-}
-
static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
{
u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
@@ -1029,12 +1016,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL0_SHIFT,
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
},
- {
- .desc = "Reduced HYP mapping offset",
- .capability = ARM64_HYP_OFFSET_LOW,
- .def_scope = SCOPE_SYSTEM,
- .matches = hyp_offset_low,
- },
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.desc = "Kernel page table isolation (KPTI)",
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2b6b8b24e5ab..b0853069702f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -577,6 +577,13 @@ set_hcr:
7:
msr mdcr_el2, x3 // Configure debug traps
+ /* LORegions */
+ mrs x1, id_aa64mmfr1_el1
+ ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+ cbz x0, 1f
+ msr_s SYS_LORC_EL1, xzr
+1:
+
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2718a77da165..816d03c4c913 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -35,6 +35,7 @@
#define AARCH64_INSN_SF_BIT BIT(31)
#define AARCH64_INSN_N_BIT BIT(22)
+#define AARCH64_INSN_LSL_12 BIT(22)
static int aarch64_insn_encoding_class[] = {
AARCH64_INSN_CLS_UNKNOWN,
@@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
mask = BIT(6) - 1;
shift = 16;
break;
+ case AARCH64_INSN_IMM_N:
+ mask = 1;
+ shift = 22;
+ break;
default:
return -EINVAL;
}
@@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
return AARCH64_BREAK_FAULT;
}
+ /* We can't encode more than a 24bit value (12bit + 12bit shift) */
+ if (imm & ~(BIT(24) - 1))
+ goto out;
+
+ /* If we have something in the top 12 bits... */
if (imm & ~(SZ_4K - 1)) {
- pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
- return AARCH64_BREAK_FAULT;
+ /* ... and in the low 12 bits -> error */
+ if (imm & (SZ_4K - 1))
+ goto out;
+
+ imm >>= 12;
+ insn |= AARCH64_INSN_LSL_12;
}
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+
+out:
+ pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+ return AARCH64_BREAK_FAULT;
}
u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
@@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
__check_hi, __check_ls, __check_ge, __check_lt,
__check_gt, __check_le, __check_al, __check_al
};
+
+static bool range_of_ones(u64 val)
+{
+ /* Doesn't handle full ones or full zeroes */
+ u64 sval = val >> __ffs64(val);
+
+ /* One of Sean Eron Anderson's bithack tricks */
+ return ((sval + 1) & (sval)) == 0;
+}
+
+static u32 aarch64_encode_immediate(u64 imm,
+ enum aarch64_insn_variant variant,
+ u32 insn)
+{
+ unsigned int immr, imms, n, ones, ror, esz, tmp;
+ u64 mask = ~0UL;
+
+ /* Can't encode full zeroes or full ones */
+ if (!imm || !~imm)
+ return AARCH64_BREAK_FAULT;
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (upper_32_bits(imm))
+ return AARCH64_BREAK_FAULT;
+ esz = 32;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ insn |= AARCH64_INSN_SF_BIT;
+ esz = 64;
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ /*
+ * Inverse of Replicate(). Try to spot a repeating pattern
+ * with a pow2 stride.
+ */
+ for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
+ u64 emask = BIT(tmp) - 1;
+
+ if ((imm & emask) != ((imm >> tmp) & emask))
+ break;
+
+ esz = tmp;
+ mask = emask;
+ }
+
+ /* N is only set if we're encoding a 64bit value */
+ n = esz == 64;
+
+ /* Trim imm to the element size */
+ imm &= mask;
+
+ /* That's how many ones we need to encode */
+ ones = hweight64(imm);
+
+ /*
+ * imms is set to (ones - 1), prefixed with a string of ones
+ * and a zero if they fit. Cap it to 6 bits.
+ */
+ imms = ones - 1;
+ imms |= 0xf << ffs(esz);
+ imms &= BIT(6) - 1;
+
+ /* Compute the rotation */
+ if (range_of_ones(imm)) {
+ /*
+ * Pattern: 0..01..10..0
+ *
+ * Compute how many rotate we need to align it right
+ */
+ ror = __ffs64(imm);
+ } else {
+ /*
+ * Pattern: 0..01..10..01..1
+ *
+ * Fill the unused top bits with ones, and check if
+ * the result is a valid immediate (all ones with a
+ * contiguous ranges of zeroes).
+ */
+ imm |= ~mask;
+ if (!range_of_ones(~imm))
+ return AARCH64_BREAK_FAULT;
+
+ /*
+ * Compute the rotation to get a continuous set of
+ * ones, with the first bit set at position 0
+ */
+ ror = fls(~imm);
+ }
+
+ /*
+ * immr is the number of bits we need to rotate back to the
+ * original set of ones. Note that this is relative to the
+ * element size...
+ */
+ immr = (esz - ror) % esz;
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+ enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u64 imm)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LOGIC_AND:
+ insn = aarch64_insn_get_and_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_ORR:
+ insn = aarch64_insn_get_orr_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_EOR:
+ insn = aarch64_insn_get_eor_imm_value();
+ break;
+ case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+ insn = aarch64_insn_get_ands_imm_value();
+ break;
+ default:
+ pr_err("%s: unknown logical encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_encode_immediate(imm, variant, insn);
+}
+
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+ enum aarch64_insn_register Rm,
+ enum aarch64_insn_register Rn,
+ enum aarch64_insn_register Rd,
+ u8 lsb)
+{
+ u32 insn;
+
+ insn = aarch64_insn_get_extr_value();
+
+ switch (variant) {
+ case AARCH64_INSN_VARIANT_32BIT:
+ if (lsb > 31)
+ return AARCH64_BREAK_FAULT;
+ break;
+ case AARCH64_INSN_VARIANT_64BIT:
+ if (lsb > 63)
+ return AARCH64_BREAK_FAULT;
+ insn |= AARCH64_INSN_SF_BIT;
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
+ break;
+ default:
+ pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
+}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 2257dfcc44cc..a2e3a5af1113 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -57,6 +57,9 @@ config KVM_ARM_PMU
Adds support for a virtual Performance Monitoring Unit (PMU) in
virtual machines.
+config KVM_INDIRECT_VECTORS
+ def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+
source drivers/vhost/Kconfig
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 87c4f7ae24de..93afff91cb7c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fa63b28c65e0..a1f4ebdfe6d3 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2);
*/
static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
+ u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+
+ vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
vcpu->arch.guest_debug_preserved.mdscr_el1);
@@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
{
- vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
+ u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+ vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
- vcpu_sys_reg(vcpu, MDSCR_EL1));
+ vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
/**
@@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
{
bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+ unsigned long mdscr;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
} else {
- vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_SS;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
@@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
*/
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
/* Enable breakpoints/watchpoints */
- vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr |= DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
@@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
if (trap_debug)
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+ /* If KDE or MDE are set, perform a full save/restore cycle. */
+ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
- trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
+ trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
}
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d7e3299a7734..959e50d2588c 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -363,8 +363,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
int ret = 0;
- vcpu_load(vcpu);
-
trace_kvm_set_guest_debug(vcpu, dbg->control);
if (dbg->control & ~KVM_GUESTDBG_VALID_MASK) {
@@ -386,7 +384,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
}
out:
- vcpu_put(vcpu);
return ret;
}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 5aa9ccf6db99..6fd91b31a131 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -117,7 +117,6 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
/* Set the stack and new vectors */
kern_hyp_va x1
mov sp, x1
- kern_hyp_va x2
msr vbar_el2, x2
/* copy tpidr_el1 into tpidr_el2 for use by HYP */
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f04400d494b7..4313f7475333 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index dabb5cc7b087..3e717f66f011 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -66,11 +66,6 @@
default: write_debug(ptr[0], reg, 0); \
}
-static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
-{
- /* The vcpu can run. but it can't hide. */
-}
-
static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
{
u64 reg;
@@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
dsb(nsh);
}
-static hyp_alternate_select(__debug_save_spe,
- __debug_save_spe_nvhe, __debug_save_spe_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
{
if (!pmscr_el1)
return;
@@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
}
-void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
wrps = (aa64dfr0 >> 20) & 0xf;
@@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
}
-void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug_arch *dbg,
- struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug_arch *dbg,
+ struct kvm_cpu_context *ctxt)
{
u64 aa64dfr0;
int brps, wrps;
- if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
- return;
-
aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
brps = (aa64dfr0 >> 12) & 0xf;
@@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
}
-void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
{
- /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
- * a full save/restore cycle. */
- if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
- (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
- vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
-
- __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
- __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ /*
+ * Non-VHE: Disable and flush SPE data generation
+ * VHE: The vcpu can run, but it can't hide.
+ */
+ if (!has_vhe())
+ __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, host_dbg, host_ctxt);
+ __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
}
-void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
- __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
- kern_hyp_va(vcpu->arch.host_cpu_context));
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ struct kvm_guest_debug_arch *host_dbg;
+ struct kvm_guest_debug_arch *guest_dbg;
+
+ if (!has_vhe())
+ __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+
+ if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+ return;
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ guest_ctxt = &vcpu->arch.ctxt;
+ host_dbg = &vcpu->arch.host_debug_state.regs;
+ guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+ __debug_save_state(vcpu, guest_dbg, guest_ctxt);
+ __debug_restore_state(vcpu, host_dbg, host_ctxt);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
- vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+ vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
}
u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068ee3a5..1f458f7c3b44 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
- // Store host_ctxt and vcpu for use at exit time
- stp x1, x0, [sp, #-16]!
-
add x18, x0, #VCPU_CONTEXT
// Restore guest regs x0-x17
@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
// Store the guest regs x19-x29, lr
save_callee_saved_regs x1
- // Restore the host_ctxt from the stack
- ldr x2, [sp], #16
+ get_host_ctxt x2, x3
// Now restore the host regs
restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f36464bd57c5..87dfecce82b1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -55,15 +55,9 @@ ENTRY(__vhe_hyp_call)
ENDPROC(__vhe_hyp_call)
el1_sync: // Guest trapped into EL2
- stp x0, x1, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x1, esr_el2
-alternative_else
- mrs x1, esr_el1
-alternative_endif
- lsr x0, x1, #ESR_ELx_EC_SHIFT
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
cmp x0, #ESR_ELx_EC_HVC64
ccmp x0, #ESR_ELx_EC_HVC32, #4, ne
b.ne el1_trap
@@ -117,10 +111,14 @@ el1_hvc_guest:
eret
el1_trap:
+ get_vcpu_ptr x1, x0
+
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
/*
* x0: ESR_EC
+ * x1: vcpu pointer
*/
- ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -137,18 +135,18 @@ alternative_else_nop_endif
b __guest_exit
el1_irq:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
- stp x0, x1, [sp, #-16]!
- ldr x1, [sp, #16 + 8]
+ get_vcpu_ptr x1, x0
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
el2_error:
+ ldp x0, x1, [sp], #16
+
/*
* Only two possibilities:
* 1) Either we come from the exit path, having just unmasked
@@ -180,14 +178,7 @@ ENTRY(__hyp_do_panic)
ENDPROC(__hyp_do_panic)
ENTRY(__hyp_panic)
- /*
- * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
- * not be accessible by this address from EL2, hyp_panic() converts
- * it with kern_hyp_va() before use.
- */
- ldr x0, =kvm_host_cpu_state
- mrs x1, tpidr_el2
- add x0, x0, x1
+ get_host_ctxt x0, x1
b hyp_panic
ENDPROC(__hyp_panic)
@@ -206,32 +197,43 @@ ENDPROC(\label)
invalid_vector el2h_sync_invalid
invalid_vector el2h_irq_invalid
invalid_vector el2h_fiq_invalid
- invalid_vector el1_sync_invalid
- invalid_vector el1_irq_invalid
invalid_vector el1_fiq_invalid
.ltorg
.align 11
+.macro valid_vect target
+ .align 7
+ stp x0, x1, [sp, #-16]!
+ b \target
+.endm
+
+.macro invalid_vect target
+ .align 7
+ b \target
+ ldp x0, x1, [sp], #16
+ b \target
+.endm
+
ENTRY(__kvm_hyp_vector)
- ventry el2t_sync_invalid // Synchronous EL2t
- ventry el2t_irq_invalid // IRQ EL2t
- ventry el2t_fiq_invalid // FIQ EL2t
- ventry el2t_error_invalid // Error EL2t
-
- ventry el2h_sync_invalid // Synchronous EL2h
- ventry el2h_irq_invalid // IRQ EL2h
- ventry el2h_fiq_invalid // FIQ EL2h
- ventry el2_error // Error EL2h
-
- ventry el1_sync // Synchronous 64-bit EL1
- ventry el1_irq // IRQ 64-bit EL1
- ventry el1_fiq_invalid // FIQ 64-bit EL1
- ventry el1_error // Error 64-bit EL1
-
- ventry el1_sync // Synchronous 32-bit EL1
- ventry el1_irq // IRQ 32-bit EL1
- ventry el1_fiq_invalid // FIQ 32-bit EL1
- ventry el1_error // Error 32-bit EL1
+ invalid_vect el2t_sync_invalid // Synchronous EL2t
+ invalid_vect el2t_irq_invalid // IRQ EL2t
+ invalid_vect el2t_fiq_invalid // FIQ EL2t
+ invalid_vect el2t_error_invalid // Error EL2t
+
+ invalid_vect el2h_sync_invalid // Synchronous EL2h
+ invalid_vect el2h_irq_invalid // IRQ EL2h
+ invalid_vect el2h_fiq_invalid // FIQ EL2h
+ valid_vect el2_error // Error EL2h
+
+ valid_vect el1_sync // Synchronous 64-bit EL1
+ valid_vect el1_irq // IRQ 64-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 64-bit EL1
+ valid_vect el1_error // Error 64-bit EL1
+
+ valid_vect el1_sync // Synchronous 32-bit EL1
+ valid_vect el1_irq // IRQ 32-bit EL1
+ invalid_vect el1_fiq_invalid // FIQ 32-bit EL1
+ valid_vect el1_error // Error 32-bit EL1
ENDPROC(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 870f4b1587f9..07b572173265 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void)
return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
}
-static bool __hyp_text __fpsimd_enabled_vhe(void)
+static bool fpsimd_enabled_vhe(void)
{
return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
}
-static hyp_alternate_select(__fpsimd_is_enabled,
- __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-bool __hyp_text __fpsimd_enabled(void)
-{
- return __fpsimd_is_enabled()();
-}
-
-static void __hyp_text __activate_traps_vhe(void)
-{
- u64 val;
-
- val = read_sysreg(cpacr_el1);
- val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
- write_sysreg(val, cpacr_el1);
-
- write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-
-static void __hyp_text __activate_traps_nvhe(void)
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
- u64 val;
+ if (!vcpu_el1_is_32bit(vcpu))
+ return;
- val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
- write_sysreg(val, cptr_el2);
+ vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
}
-static hyp_alternate_select(__activate_traps_arch,
- __activate_traps_nvhe, __activate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
{
- u64 val;
-
/*
* We are about to set CPTR_EL2.TFP to trap all floating point
* register accesses to EL2, however, the ARM ARM clearly states that
@@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
* it will cause an exception.
*/
- val = vcpu->arch.hcr_el2;
-
- if (!(val & HCR_RW) && system_supports_fpsimd()) {
+ if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
+}
- if (val & HCR_RW) /* for AArch64 only: */
- val |= HCR_TID3; /* TID3: trap feature register accesses */
-
- write_sysreg(val, hcr_el2);
-
- if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
- write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+ /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
+
/*
* Make sure we trap PMU access from EL0 to EL2. Also sanitize
* PMSELR_EL0 to make sure it never contains the cycle
@@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- __activate_traps_arch()();
}
-static void __hyp_text __deactivate_traps_vhe(void)
+static void __hyp_text __deactivate_traps_common(void)
{
- extern char vectors[]; /* kernel exception vectors */
- u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ write_sysreg(0, hstr_el2);
+ write_sysreg(0, pmuserenr_el0);
+}
- mdcr_el2 &= MDCR_EL2_HPMN_MASK |
- MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
- MDCR_EL2_TPMS;
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
- write_sysreg(mdcr_el2, mdcr_el2);
+ val = read_sysreg(cpacr_el1);
+ val |= CPACR_EL1_TTA;
+ val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ write_sysreg(val, cpacr_el1);
+
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ __activate_traps_common(vcpu);
+
+ val = CPTR_EL2_DEFAULT;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+ write_sysreg(val, cptr_el2);
+}
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ write_sysreg(hcr, hcr_el2);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+ __activate_traps_fpsimd32(vcpu);
+ if (has_vhe())
+ activate_traps_vhe(vcpu);
+ else
+ __activate_traps_nvhe(vcpu);
+}
+
+static void deactivate_traps_vhe(void)
+{
+ extern char vectors[]; /* kernel exception vectors */
write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
write_sysreg(vectors, vbar_el1);
@@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ __deactivate_traps_common();
+
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
@@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void)
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
}
-static hyp_alternate_select(__deactivate_traps_arch,
- __deactivate_traps_nvhe, __deactivate_traps_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
{
/*
@@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
- __deactivate_traps_arch()();
- write_sysreg(0, hstr_el2);
- write_sysreg(0, pmuserenr_el0);
+ if (has_vhe())
+ deactivate_traps_vhe();
+ else
+ __deactivate_traps_nvhe();
+}
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+ __activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+ u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+ mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+ MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+ MDCR_EL2_TPMS;
+
+ write_sysreg(mdcr_el2, mdcr_el2);
+
+ __deactivate_traps_common();
}
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_vm(struct kvm *kvm)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
write_sysreg(kvm->arch.vttbr, vttbr_el2);
}
@@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
write_sysreg(0, vttbr_el2);
}
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
{
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
__vgic_v3_save_state(vcpu);
- else
- __vgic_v2_save_state(vcpu);
-
- write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+ __vgic_v3_deactivate_traps(vcpu);
+ }
}
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
{
- u64 val;
-
- val = read_sysreg(hcr_el2);
- val |= HCR_INT_OVERRIDE;
- val |= vcpu->arch.irq_lines;
- write_sysreg(val, hcr_el2);
-
- if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+ __vgic_v3_activate_traps(vcpu);
__vgic_v3_restore_state(vcpu);
- else
- __vgic_v2_restore_state(vcpu);
+ }
}
static bool __hyp_text __true_value(void)
@@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
}
}
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm_cpu_context *host_ctxt;
- struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
- u64 exit_code;
-
- vcpu = kern_hyp_va(vcpu);
-
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
- host_ctxt->__hyp_running_vcpu = vcpu;
- guest_ctxt = &vcpu->arch.ctxt;
-
- __sysreg_save_host_state(host_ctxt);
- __debug_cond_save_host_state(vcpu);
-
- __activate_traps(vcpu);
- __activate_vm(vcpu);
-
- __vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
- /*
- * We must restore the 32-bit state before the sysregs, thanks
- * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
- */
- __sysreg32_restore_state(vcpu);
- __sysreg_restore_guest_state(guest_ctxt);
- __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
-
- /* Jump in the fire! */
-again:
- exit_code = __guest_enter(vcpu, host_ctxt);
- /* And we're baaack! */
-
- if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+ if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
/*
* We're using the raw exception code in order to only process
* the trap if no SError is pending. We will come back to the
* same PC once the SError has been injected, and replay the
* trapping instruction.
*/
- if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
- goto again;
+ if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP) {
+ *exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -366,9 +352,9 @@ again:
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
if (ret == -1) {
@@ -380,29 +366,112 @@ again:
*/
if (!__skip_instr(vcpu))
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
- exit_code = ARM_EXCEPTION_EL1_SERROR;
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
}
-
- /* 0 falls through to be handler out of EL2 */
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- exit_code == ARM_EXCEPTION_TRAP &&
+ *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
if (ret == 1) {
if (__skip_instr(vcpu))
- goto again;
+ return true;
else
- exit_code = ARM_EXCEPTION_TRAP;
+ *exit_code = ARM_EXCEPTION_TRAP;
}
+ }
- /* 0 falls through to be handled out of EL2 */
+ /* Return to the host kernel and handle the exit */
+ return false;
+}
+
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ host_ctxt = vcpu->arch.host_cpu_context;
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ sysreg_save_host_state_vhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(vcpu->kvm);
+
+ sysreg_restore_guest_state_vhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
+ fp_enabled = fpsimd_enabled_vhe();
+
+ sysreg_save_guest_state_vhe(guest_ctxt);
+
+ __deactivate_traps(vcpu);
+
+ sysreg_restore_host_state_vhe(host_ctxt);
+
+ if (fp_enabled) {
+ __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+ __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
+ __debug_switch_to_host(vcpu);
+
+ return exit_code;
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_cpu_context *guest_ctxt;
+ bool fp_enabled;
+ u64 exit_code;
+
+ vcpu = kern_hyp_va(vcpu);
+
+ host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
+ guest_ctxt = &vcpu->arch.ctxt;
+
+ __sysreg_save_state_nvhe(host_ctxt);
+
+ __activate_traps(vcpu);
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
+ /*
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_state_nvhe(guest_ctxt);
+ __debug_switch_to_guest(vcpu);
+
+ do {
+ /* Jump in the fire! */
+ exit_code = __guest_enter(vcpu, host_ctxt);
+
+ /* And we're baaack! */
+ } while (fixup_guest_exit(vcpu, &exit_code));
+
if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
u32 midr = read_cpuid_id();
@@ -413,29 +482,29 @@ again:
}
}
- fp_enabled = __fpsimd_enabled();
+ fp_enabled = __fpsimd_enabled_nvhe();
- __sysreg_save_guest_state(guest_ctxt);
+ __sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
- __vgic_save_state(vcpu);
+ __hyp_vgic_save_state(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
+ __sysreg_restore_state_nvhe(host_ctxt);
if (fp_enabled) {
__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ __fpsimd_save_fpexc32(vcpu);
}
- __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
/*
* This must come after restoring the host sysregs, since a non-VHE
* system may enable SPE here and make use of the TTBRs.
*/
- __debug_cond_restore_host_state(vcpu);
+ __debug_switch_to_host(vcpu);
return exit_code;
}
@@ -443,10 +512,20 @@ again:
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+ struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu;
unsigned long str_va;
+ vcpu = __host_ctxt->__hyp_running_vcpu;
+
+ if (read_sysreg(vttbr_el2)) {
+ __timer_disable_traps(vcpu);
+ __deactivate_traps(vcpu);
+ __deactivate_vm(vcpu);
+ __sysreg_restore_state_nvhe(__host_ctxt);
+ }
+
/*
* Force the panic string to be loaded from the literal pool,
* making sure it is a kernel address and not a PC-relative
@@ -460,40 +539,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
- struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
{
+ struct kvm_vcpu *vcpu;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ __deactivate_traps(vcpu);
+ sysreg_restore_host_state_vhe(host_ctxt);
+
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg(hpfar_el2), par, vcpu);
}
-static hyp_alternate_select(__hyp_call_panic,
- __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
- ARM64_HAS_VIRT_HOST_EXTN);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
- struct kvm_vcpu *vcpu = NULL;
-
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
- if (read_sysreg(vttbr_el2)) {
- struct kvm_cpu_context *host_ctxt;
-
- host_ctxt = kern_hyp_va(__host_ctxt);
- vcpu = host_ctxt->__hyp_running_vcpu;
- __timer_disable_traps(vcpu);
- __deactivate_traps(vcpu);
- __deactivate_vm(vcpu);
- __sysreg_restore_host_state(host_ctxt);
- }
-
- /* Call panic for real */
- __hyp_call_panic()(spsr, elr, par, vcpu);
+ if (!has_vhe())
+ __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+ else
+ __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 2c17afd2be96..b3894df6bf1a 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,32 +19,43 @@
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
-/* Yes, this does nothing, on purpose */
-static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
- * and guest must save everything.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU. EL1 registers only need to be
+ * switched when potentially going to run a different VCPU. The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
- ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
- ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
- ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
}
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+ ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
+ ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
+ ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
@@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
@@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_save_host_state,
- __sysreg_save_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_save_el1_state(ctxt);
+ __sysreg_save_common_state(ctxt);
+ __sysreg_save_user_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
}
-void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_save_state(ctxt);
__sysreg_save_common_state(ctxt);
+ __sysreg_save_el2_return_state(ctxt);
}
static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
- write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
- write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
- write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
+
+ /*
+ * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+ * therefore be saved/restored on every entry/exit to/from the guest.
+ */
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
}
-static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+ write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
+ write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
+ write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
@@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static void __hyp_text
+__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
@@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
}
-static hyp_alternate_select(__sysreg_call_restore_host_state,
- __sysreg_restore_state, __sysreg_do_nothing,
- ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+ __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_common_state(ctxt);
+ __sysreg_restore_user_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
+}
-void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_call_restore_host_state()(ctxt);
__sysreg_restore_common_state(ctxt);
}
-void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_state(ctxt);
__sysreg_restore_common_state(ctxt);
+ __sysreg_restore_el2_return_state(ctxt);
}
void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
- if (__fpsimd_enabled())
- sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
}
@@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
{
u64 *spsr, *sysreg;
- if (read_sysreg(hcr_el2) & HCR_RW)
+ if (!vcpu_el1_is_32bit(vcpu))
return;
spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
- if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+ if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
}
+
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ __sysreg_save_user_state(host_ctxt);
+
+ /*
+ * Load guest EL1 and user state
+ *
+ * We must restore the 32-bit state before the sysregs, thanks
+ * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ */
+ __sysreg32_restore_state(vcpu);
+ __sysreg_restore_user_state(guest_ctxt);
+ __sysreg_restore_el1_state(guest_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = true;
+
+ activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2. This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+ struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+ if (!has_vhe())
+ return;
+
+ deactivate_traps_vhe_put();
+
+ __sysreg_save_el1_state(guest_ctxt);
+ __sysreg_save_user_state(guest_ctxt);
+ __sysreg32_save_state(vcpu);
+
+ /* Restore host user state */
+ __sysreg_restore_user_state(host_ctxt);
+
+ vcpu->arch.sysregs_loaded_on_cpu = false;
+}
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+ asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
new file mode 100644
index 000000000000..86801b6055d6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
+ * guest.
+ *
+ * @vcpu: the offending vcpu
+ *
+ * Returns:
+ * 1: GICV access successfully performed
+ * 0: Not a GICV access
+ * -1: Illegal GICV access
+ */
+int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ phys_addr_t fault_ipa;
+ void __iomem *addr;
+ int rd;
+
+ /* Build the full address */
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+ /* If not for GICV, move on */
+ if (fault_ipa < vgic->vgic_cpu_base ||
+ fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
+ return 0;
+
+ /* Reject anything but a 32bit access */
+ if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
+ return -1;
+
+ /* Not aligned? Don't bother */
+ if (fault_ipa & 3)
+ return -1;
+
+ rd = kvm_vcpu_dabt_get_rd(vcpu);
+ addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr += fault_ipa - vgic->vgic_cpu_base;
+
+ if (kvm_vcpu_dabt_iswrite(vcpu)) {
+ u32 data = vcpu_data_guest_to_host(vcpu,
+ vcpu_get_reg(vcpu, rd),
+ sizeof(u32));
+ writel_relaxed(data, addr);
+ } else {
+ u32 data = readl_relaxed(addr);
+ vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
+ sizeof(u32)));
+ }
+
+ return 1;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 60666a056944..d8e71659ba7e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
exc_offset = LOWER_EL_AArch32_VECTOR;
}
- return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+ return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
}
static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u32 esr = 0;
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
- vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+ vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
/*
* Build an {i,d}abort, depending on the level and the
@@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+ vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
}
static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
unsigned long cpsr = *vcpu_cpsr(vcpu);
u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
- *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+ vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
- *vcpu_spsr(vcpu) = cpsr;
+ vcpu_write_spsr(vcpu, cpsr);
/*
* Build an unknown exception, depending on the instruction
@@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
if (kvm_vcpu_trap_il_is32bit(vcpu))
esr |= ESR_ELx_IL;
- vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+ vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
}
/**
@@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
*/
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_dabt32(vcpu, addr);
else
inject_abt64(vcpu, false, addr);
@@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_pabt32(vcpu, addr);
else
inject_abt64(vcpu, true, addr);
@@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
*/
void kvm_inject_undefined(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ if (vcpu_el1_is_32bit(vcpu))
kvm_inject_undef32(vcpu);
else
inject_undef64(vcpu);
@@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
{
vcpu_set_vsesr(vcpu, esr);
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+ *vcpu_hcr(vcpu) |= HCR_VSE;
}
/**
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index bbc6ae32e4af..eefe403a2e63 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
/*
* Return the SPSR for the current mode of the virtual CPU.
*/
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
{
unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
switch (mode) {
- case COMPAT_PSR_MODE_SVC:
- mode = KVM_SPSR_SVC;
- break;
- case COMPAT_PSR_MODE_ABT:
- mode = KVM_SPSR_ABT;
- break;
- case COMPAT_PSR_MODE_UND:
- mode = KVM_SPSR_UND;
- break;
- case COMPAT_PSR_MODE_IRQ:
- mode = KVM_SPSR_IRQ;
- break;
- case COMPAT_PSR_MODE_FIQ:
- mode = KVM_SPSR_FIQ;
- break;
+ case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC;
+ case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT;
+ case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND;
+ case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ;
+ case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ;
+ default: BUG();
+ }
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ return read_sysreg_el1(spsr);
+ case KVM_SPSR_ABT:
+ return read_sysreg(spsr_abt);
+ case KVM_SPSR_UND:
+ return read_sysreg(spsr_und);
+ case KVM_SPSR_IRQ:
+ return read_sysreg(spsr_irq);
+ case KVM_SPSR_FIQ:
+ return read_sysreg(spsr_fiq);
default:
BUG();
}
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+ int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+ if (!vcpu->arch.sysregs_loaded_on_cpu) {
+ vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+ return;
+ }
- return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+ switch (spsr_idx) {
+ case KVM_SPSR_SVC:
+ write_sysreg_el1(v, spsr);
+ case KVM_SPSR_ABT:
+ write_sysreg(v, spsr_abt);
+ case KVM_SPSR_UND:
+ write_sysreg(v, spsr_und);
+ case KVM_SPSR_IRQ:
+ write_sysreg(v, spsr_irq);
+ case KVM_SPSR_FIQ:
+ write_sysreg(v, spsr_fiq);
+ }
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 50a43c7b97ca..806b0b126a64 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/perf_event.h>
#include <asm/sysreg.h>
@@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
return false;
}
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_read;
+
+ /*
+ * System registers listed in the switch are not saved on every
+ * exit from the guest but are only saved on vcpu_put.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the guest cannot modify its
+ * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+ * thread when emulating cross-VCPU communication.
+ */
+ switch (reg) {
+ case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
+ case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+ case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
+ case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
+ case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
+ case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
+ case TCR_EL1: return read_sysreg_s(tcr_EL12);
+ case ESR_EL1: return read_sysreg_s(esr_EL12);
+ case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
+ case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
+ case FAR_EL1: return read_sysreg_s(far_EL12);
+ case MAIR_EL1: return read_sysreg_s(mair_EL12);
+ case VBAR_EL1: return read_sysreg_s(vbar_EL12);
+ case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
+ case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
+ case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
+ case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
+ case AMAIR_EL1: return read_sysreg_s(amair_EL12);
+ case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
+ case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
+ case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
+ case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
+ case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2);
+ }
+
+immediate_read:
+ return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+ if (!vcpu->arch.sysregs_loaded_on_cpu)
+ goto immediate_write;
+
+ /*
+ * System registers listed in the switch are not restored on every
+ * entry to the guest but are only restored on vcpu_load.
+ *
+ * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+ * should never be listed below, because the the MPIDR should only be
+ * set once, before running the VCPU, and never changed later.
+ */
+ switch (reg) {
+ case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
+ case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
+ case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
+ case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
+ case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
+ case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
+ case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
+ case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
+ case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
+ case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
+ case FAR_EL1: write_sysreg_s(val, far_EL12); return;
+ case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
+ case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
+ case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
+ case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
+ case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
+ case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
+ case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
+ case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
+ case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
+ case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
+ case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
+ case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return;
+ }
+
+immediate_write:
+ __vcpu_sys_reg(vcpu, reg) = val;
+}
+
/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
static u32 cache_levels;
@@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
bool was_enabled = vcpu_has_cache_enabled(vcpu);
+ u64 val;
+ int reg = r->reg;
BUG_ON(!p->is_write);
- if (!p->is_aarch32) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ /* See the 32bit mapping in kvm_host.h */
+ if (p->is_aarch32)
+ reg = r->reg / 2;
+
+ if (!p->is_aarch32 || !p->is_32bit) {
+ val = p->regval;
} else {
- if (!p->is_32bit)
- vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
- vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
+ val = vcpu_read_sys_reg(vcpu, reg);
+ if (r->reg % 2)
+ val = (p->regval << 32) | (u64)lower_32_bits(val);
+ else
+ val = ((u64)upper_32_bits(val) << 32) |
+ lower_32_bits(p->regval);
}
+ vcpu_write_sys_reg(vcpu, val, reg);
kvm_toggle_cache(vcpu, was_enabled);
return true;
@@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
+static bool trap_undef(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- vcpu_sys_reg(vcpu, r->reg) = p->regval;
+ vcpu_write_sys_reg(vcpu, p->regval, r->reg);
vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
} else {
- p->regval = vcpu_sys_reg(vcpu, r->reg);
+ p->regval = vcpu_read_sys_reg(vcpu, r->reg);
}
trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu,
static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
+ u64 amair = read_sysreg(amair_el1);
+ vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
}
static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
- vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
+ vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
}
static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
}
static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
{
- u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+ u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
if (!enabled)
@@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
/* Only update writeable bits of PMCR */
- val = vcpu_sys_reg(vcpu, PMCR_EL0);
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
- vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
} else {
/* PMCR.P & PMCR.C are RAZ */
- val = vcpu_sys_reg(vcpu, PMCR_EL0)
+ val = __vcpu_sys_reg(vcpu, PMCR_EL0)
& ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
p->regval = val;
}
@@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
if (p->is_write)
- vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+ __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
else
/* return PMSELR.SEL field */
- p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
return true;
@@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
{
u64 pmcr, val;
- pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+ pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
kvm_inject_undefined(vcpu);
@@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
& ARMV8_PMU_COUNTER_MASK;
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
@@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
- idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
- vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+ __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+ p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
}
return true;
@@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = p->regval & mask;
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
} else {
/* accessing PMCNTENCLR_EL0 */
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
kvm_pmu_disable_counter(vcpu, val);
}
} else {
- p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
}
return true;
@@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1)
/* accessing PMINTENSET_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
else
/* accessing PMINTENCLR_EL1 */
- vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+ __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
}
return true;
@@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
else
/* accessing PMOVSCLR_EL0 */
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
} else {
- p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+ p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
}
return true;
@@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
- vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
- & ARMV8_PMU_USERENR_MASK;
+ __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+ p->regval & ARMV8_PMU_USERENR_MASK;
} else {
- p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+ p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
}
@@ -893,6 +1000,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
task_pid_nr(current));
val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+ } else if (id == SYS_ID_AA64MMFR1_EL1) {
+ if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
+ pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n",
+ task_pid_nr(current));
+
+ val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
}
return val;
@@ -1178,6 +1291,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+ { SYS_DESC(SYS_LORSA_EL1), trap_undef },
+ { SYS_DESC(SYS_LOREA_EL1), trap_undef },
+ { SYS_DESC(SYS_LORN_EL1), trap_undef },
+ { SYS_DESC(SYS_LORC_EL1), trap_undef },
+ { SYS_DESC(SYS_LORID_EL1), trap_undef },
+
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
@@ -1545,6 +1664,11 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+ /* CNTP_TVAL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
+ /* CNTP_CTL */
+ { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+
/* PMEVCNTRn */
PMU_PMEVCNTR(0),
PMU_PMEVCNTR(1),
@@ -1618,6 +1742,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
};
/* Target specific emulation tables */
@@ -2194,7 +2319,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->get_user)
return (r->get_user)(vcpu, r, reg, uaddr);
- return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+ return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
}
int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -2215,7 +2340,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
if (r->set_user)
return (r->set_user)(vcpu, r, reg, uaddr);
- return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+ return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
}
static unsigned int num_demux_regs(void)
@@ -2421,6 +2546,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
reset_sys_reg_descs(vcpu, table, num);
for (num = 1; num < NR_SYS_REGS; num++)
- if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
- panic("Didn't reset vcpu_sys_reg(%zi)", num);
+ if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+ panic("Didn't reset __vcpu_sys_reg(%zi)", num);
}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 060f5348ef25..cd710f8b63e0 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+ __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
}
static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
- vcpu_sys_reg(vcpu, r->reg) = r->val;
+ __vcpu_sys_reg(vcpu, r->reg) = r->val;
}
static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 969ade1d333d..ddb8497d18d6 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
if (p->is_write)
return ignore_write(vcpu, p);
- p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
+ p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
return true;
}
static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
- vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
+ __vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
}
/*
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
new file mode 100644
index 000000000000..c712a7376bc1
--- /dev/null
+++ b/arch/arm64/kvm/va_layout.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/random.h>
+#include <linux/memblock.h>
+#include <asm/alternative.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ */
+static u8 tag_lsb;
+/*
+ * The random hyp VA tag value with the region bit if hyp randomization is used
+ */
+static u64 tag_val;
+static u64 va_mask;
+
+static void compute_layout(void)
+{
+ phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
+ u64 hyp_va_msb;
+ int kva_msb;
+
+ /* Where is my RAM region? */
+ hyp_va_msb = idmap_addr & BIT(VA_BITS - 1);
+ hyp_va_msb ^= BIT(VA_BITS - 1);
+
+ kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+ (u64)(high_memory - 1));
+
+ if (kva_msb == (VA_BITS - 1)) {
+ /*
+ * No space in the address, let's compute the mask so
+ * that it covers (VA_BITS - 1) bits, and the region
+ * bit. The tag stays set to zero.
+ */
+ va_mask = BIT(VA_BITS - 1) - 1;
+ va_mask |= hyp_va_msb;
+ } else {
+ /*
+ * We do have some free bits to insert a random tag.
+ * Hyp VAs are now created from kernel linear map VAs
+ * using the following formula (with V == VA_BITS):
+ *
+ * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+ * ---------------------------------------------------------
+ * | 0000000 | hyp_va_msb | random tag | kern linear VA |
+ */
+ tag_lsb = kva_msb;
+ va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+ tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
+ tag_val |= hyp_va_msb;
+ tag_val >>= tag_lsb;
+ }
+}
+
+static u32 compute_instruction(int n, u32 rd, u32 rn)
+{
+ u32 insn = AARCH64_BREAK_FAULT;
+
+ switch (n) {
+ case 0:
+ insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND,
+ AARCH64_INSN_VARIANT_64BIT,
+ rn, rd, va_mask);
+ break;
+
+ case 1:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd,
+ tag_lsb);
+ break;
+
+ case 2:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(11, 0),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 3:
+ insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+ tag_val & GENMASK(23, 12),
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+ break;
+
+ case 4:
+ /* ROR is a variant of EXTR with Rm = Rn */
+ insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+ rn, rn, rd, 64 - tag_lsb);
+ break;
+ }
+
+ return insn;
+}
+
+void __init kvm_update_va_mask(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ int i;
+
+ BUG_ON(nr_inst != 5);
+
+ if (!has_vhe() && !va_mask)
+ compute_layout();
+
+ for (i = 0; i < nr_inst; i++) {
+ u32 rd, rn, insn, oinsn;
+
+ /*
+ * VHE doesn't need any address translation, let's NOP
+ * everything.
+ *
+ * Alternatively, if we don't have any spare bits in
+ * the address, NOP everything after masking that
+ * kernel VA.
+ */
+ if (has_vhe() || (!tag_lsb && i > 0)) {
+ updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+ continue;
+ }
+
+ oinsn = le32_to_cpu(origptr[i]);
+ rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+ rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn);
+
+ insn = compute_instruction(i, rd, rn);
+ BUG_ON(insn == AARCH64_BREAK_FAULT);
+
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
+void *__kvm_bp_vect_base;
+int __kvm_harden_el2_vector_slot;
+
+void kvm_patch_vector_branch(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ u64 addr;
+ u32 insn;
+
+ BUG_ON(nr_inst != 5);
+
+ if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+ WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+ return;
+ }
+
+ if (!va_mask)
+ compute_layout();
+
+ /*
+ * Compute HYP VA by using the same computation as kern_hyp_va()
+ */
+ addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
+ addr &= va_mask;
+ addr |= tag_val << tag_lsb;
+
+ /* Use PC[10:7] to branch to the same vector in KVM */
+ addr |= ((u64)origptr & GENMASK_ULL(10, 7));
+
+ /*
+ * Branch to the second instruction in the vectors in order to
+ * avoid the initial store on the stack (which we already
+ * perform in the hardening vectors).
+ */
+ addr += AARCH64_INSN_SIZE;
+
+ /* stp x0, x1, [sp, #-16]! */
+ insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
+ AARCH64_INSN_REG_1,
+ AARCH64_INSN_REG_SP,
+ -16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movz x0, #(addr & 0xffff) */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)addr,
+ 0,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_ZERO);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 16) & 0xffff), lsl #16 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 16),
+ 16,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* movk x0, #((addr >> 32) & 0xffff), lsl #32 */
+ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+ (u16)(addr >> 32),
+ 32,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_MOVEWIDE_KEEP);
+ *updptr++ = cpu_to_le32(insn);
+
+ /* br x0 */
+ insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0,
+ AARCH64_INSN_BRANCH_NOLINK);
+ *updptr++ = cpu_to_le32(insn);
+}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdbd142ca7f2..24f03941ada8 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -57,11 +57,15 @@ struct vgic_global {
/* Physical address of vgic virtual cpu interface */
phys_addr_t vcpu_base;
- /* GICV mapping */
+ /* GICV mapping, kernel VA */
void __iomem *vcpu_base_va;
+ /* GICV mapping, HYP VA */
+ void __iomem *vcpu_hyp_va;
- /* virtual control interface mapping */
+ /* virtual control interface mapping, kernel VA */
void __iomem *vctrl_base;
+ /* virtual control interface mapping, HYP VA */
+ void __iomem *vctrl_hyp;
/* Number of implemented list registers */
int nr_lr;
@@ -209,10 +213,6 @@ struct vgic_dist {
int nr_spis;
- /* TODO: Consider moving to global state */
- /* Virtual control interface mapping */
- void __iomem *vctrl_base;
-
/* base addresses in guest physical address space: */
gpa_t vgic_dist_base; /* distributor */
union {
@@ -263,7 +263,6 @@ struct vgic_dist {
struct vgic_v2_cpu_if {
u32 vgic_hcr;
u32 vgic_vmcr;
- u64 vgic_elrsr; /* Saved only */
u32 vgic_apr;
u32 vgic_lr[VGIC_V2_MAX_LRS];
};
@@ -272,7 +271,6 @@ struct vgic_v3_cpu_if {
u32 vgic_hcr;
u32 vgic_vmcr;
u32 vgic_sre; /* Restored only, change ignored */
- u32 vgic_elrsr; /* Saved only */
u32 vgic_ap0r[4];
u32 vgic_ap1r[4];
u64 vgic_lr[VGIC_V3_MAX_LRS];
@@ -360,6 +358,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu);
bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c00c4c33e432..b26eccc78fb1 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -503,6 +503,7 @@
#define ICH_HCR_EN (1 << 0)
#define ICH_HCR_UIE (1 << 1)
+#define ICH_HCR_NPIE (1 << 3)
#define ICH_HCR_TC (1 << 10)
#define ICH_HCR_TALL0 (1 << 11)
#define ICH_HCR_TALL1 (1 << 12)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index d3453ee072fc..68d8b1f73682 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -84,6 +84,7 @@
#define GICH_HCR_EN (1 << 0)
#define GICH_HCR_UIE (1 << 1)
+#define GICH_HCR_NPIE (1 << 3)
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 8bc479fa37e6..efc84cbe8277 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
*vcpu_cpsr(vcpu) = cpsr;
/* Note: These now point to the banked copies */
- *vcpu_spsr(vcpu) = new_spsr_value;
+ vcpu_write_spsr(vcpu, new_spsr_value);
*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70f4c30918eb..bd3d57f40f1b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -545,9 +545,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
* The kernel may decide to run userspace after calling vcpu_put, so
* we reset cntvoff to 0 to ensure a consistent read between user
* accesses to the virtual counter and kernel access to the physical
- * counter.
+ * counter of non-VHE case. For VHE, the virtual counter uses a fixed
+ * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
*/
- set_cntvoff(0);
+ if (!has_vhe())
+ set_cntvoff(0);
}
/*
@@ -581,6 +583,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
@@ -594,6 +597,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
ptimer->cnt_ctl = 0;
kvm_timer_update_state(vcpu);
+ if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+ kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+
return 0;
}
@@ -767,7 +773,7 @@ int kvm_timer_hyp_init(bool has_gic)
static_branch_enable(&has_gic_active_state);
}
- kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
+ kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
"kvm/arm/timer:starting", kvm_timer_starting_cpu,
@@ -852,11 +858,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return ret;
no_vgic:
- preempt_disable();
timer->enabled = 1;
- kvm_timer_vcpu_load(vcpu);
- preempt_enable();
-
return 0;
}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86941f6181bb..dba629c5f8ac 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
+ kvm_vcpu_load_sysregs(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
@@ -384,14 +386,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- vcpu_load(vcpu);
-
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
- vcpu_put(vcpu);
return 0;
}
@@ -400,8 +399,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
- vcpu_load(vcpu);
-
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
@@ -413,7 +410,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL;
}
- vcpu_put(vcpu);
return ret;
}
@@ -426,7 +422,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
+ bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+ return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !v->arch.power_off && !v->arch.pause);
}
@@ -638,27 +635,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
- vcpu_load(vcpu);
-
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
- goto out;
+ return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
- goto out;
- if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
- ret = 0;
- goto out;
- }
-
+ return ret;
+ if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+ return 0;
}
- if (run->immediate_exit) {
- ret = -EINTR;
- goto out;
- }
+ if (run->immediate_exit)
+ return -EINTR;
+
+ vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
@@ -725,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
+ isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(&userspace_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
@@ -741,13 +734,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- if (has_vhe())
- kvm_arm_vhe_guest_enter();
-
- ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
- if (has_vhe())
+ if (has_vhe()) {
+ kvm_arm_vhe_guest_enter();
+ ret = kvm_vcpu_run_vhe(vcpu);
kvm_arm_vhe_guest_exit();
+ } else {
+ ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
+ }
+
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
@@ -817,7 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
-out:
vcpu_put(vcpu);
return ret;
}
@@ -826,18 +820,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
{
int bit_index;
bool set;
- unsigned long *ptr;
+ unsigned long *hcr;
if (number == KVM_ARM_IRQ_CPU_IRQ)
bit_index = __ffs(HCR_VI);
else /* KVM_ARM_IRQ_CPU_FIQ */
bit_index = __ffs(HCR_VF);
- ptr = (unsigned long *)&vcpu->arch.irq_lines;
+ hcr = vcpu_hcr(vcpu);
if (level)
- set = test_and_set_bit(bit_index, ptr);
+ set = test_and_set_bit(bit_index, hcr);
else
- set = test_and_clear_bit(bit_index, ptr);
+ set = test_and_clear_bit(bit_index, hcr);
/*
* If we didn't change anything, no need to wake up or kick other CPUs
@@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_device_attr attr;
long r;
- vcpu_load(vcpu);
-
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
struct kvm_vcpu_init init;
@@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
}
- vcpu_put(vcpu);
return r;
}
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
write_sysreg(cntvoff, cntvoff_el2);
}
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
{
- /*
- * We don't need to do this for VHE since the host kernel runs in EL2
- * with HCR_EL2.TGE ==1, which makes those bits have no impact.
- */
- if (!has_vhe()) {
- u64 val;
+ u64 val;
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
- }
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
}
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
{
- if (!has_vhe()) {
- u64 val;
+ u64 val;
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
- }
+ /*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
deleted file mode 100644
index 4fe6e797e8b3..000000000000
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-
-static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- u32 elrsr0, elrsr1;
-
- elrsr0 = readl_relaxed(base + GICH_ELRSR0);
- if (unlikely(nr_lr > 32))
- elrsr1 = readl_relaxed(base + GICH_ELRSR1);
- else
- elrsr1 = 0;
-
- cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-}
-
-static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int i;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- for (i = 0; i < used_lrs; i++) {
- if (cpu_if->vgic_elrsr & (1UL << i))
- cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
- else
- cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-
- writel_relaxed(0, base + GICH_LR0 + (i * 4));
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- if (!base)
- return;
-
- if (used_lrs) {
- cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
- save_elrsr(vcpu, base);
- save_lrs(vcpu, base);
-
- writel_relaxed(0, base + GICH_HCR);
- } else {
- cpu_if->vgic_elrsr = ~0UL;
- cpu_if->vgic_apr = 0;
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int i;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- if (!base)
- return;
-
- if (used_lrs) {
- writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
- writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
- for (i = 0; i < used_lrs; i++) {
- writel_relaxed(cpu_if->vgic_lr[i],
- base + GICH_LR0 + (i * 4));
- }
- }
-}
-
-#ifdef CONFIG_ARM64
-/*
- * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
- * guest.
- *
- * @vcpu: the offending vcpu
- *
- * Returns:
- * 1: GICV access successfully performed
- * 0: Not a GICV access
- * -1: Illegal GICV access
- */
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_dist *vgic = &kvm->arch.vgic;
- phys_addr_t fault_ipa;
- void __iomem *addr;
- int rd;
-
- /* Build the full address */
- fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
-
- /* If not for GICV, move on */
- if (fault_ipa < vgic->vgic_cpu_base ||
- fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
- return 0;
-
- /* Reject anything but a 32bit access */
- if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
- return -1;
-
- /* Not aligned? Don't bother */
- if (fault_ipa & 3)
- return -1;
-
- rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va);
- addr += fault_ipa - vgic->vgic_cpu_base;
-
- if (kvm_vcpu_dabt_iswrite(vcpu)) {
- u32 data = vcpu_data_guest_to_host(vcpu,
- vcpu_get_reg(vcpu, rd),
- sizeof(u32));
- writel_relaxed(data, addr);
- } else {
- u32 data = readl_relaxed(addr);
- vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
- sizeof(u32)));
- }
-
- return 1;
-}
-#endif
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index f5c3d6d7019e..616e5a433ab0 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -21,6 +21,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
@@ -208,88 +209,68 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
- u64 val;
/*
* Make sure stores to the GIC via the memory mapped interface
- * are now visible to the system register interface.
+ * are now visible to the system register interface when reading the
+ * LRs, and when reading back the VMCR on non-VHE systems.
*/
- if (!cpu_if->vgic_sre) {
- dsb(st);
- cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ dsb(sy);
+ isb();
+ }
}
if (used_lrs) {
int i;
- u32 nr_pre_bits;
+ u32 elrsr;
- cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+ elrsr = read_gicreg(ICH_ELSR_EL2);
- write_gicreg(0, ICH_HCR_EL2);
- val = read_gicreg(ICH_VTR_EL2);
- nr_pre_bits = vtr_to_nr_pre_bits(val);
+ write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++) {
- if (cpu_if->vgic_elrsr & (1 << i))
+ if (elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
else
cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
__gic_v3_set_lr(0, i);
}
+ }
+}
- switch (nr_pre_bits) {
- case 7:
- cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
- cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
- case 6:
- cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
- default:
- cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
- }
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ int i;
- switch (nr_pre_bits) {
- case 7:
- cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
- cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
- case 6:
- cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
- default:
- cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
- }
- } else {
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
- write_gicreg(0, ICH_HCR_EL2);
-
- cpu_if->vgic_elrsr = 0xffff;
- cpu_if->vgic_ap0r[0] = 0;
- cpu_if->vgic_ap0r[1] = 0;
- cpu_if->vgic_ap0r[2] = 0;
- cpu_if->vgic_ap0r[3] = 0;
- cpu_if->vgic_ap1r[0] = 0;
- cpu_if->vgic_ap1r[1] = 0;
- cpu_if->vgic_ap1r[2] = 0;
- cpu_if->vgic_ap1r[3] = 0;
- }
+ if (used_lrs) {
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ for (i = 0; i < used_lrs; i++)
+ __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ }
- if (!cpu_if->vgic_sre) {
- /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
- isb();
- write_gicreg(1, ICC_SRE_EL1);
+ /*
+ * Ensure that writes to the LRs, and on non-VHE systems ensure that
+ * the write to the VMCR in __vgic_v3_activate_traps(), will have
+ * reached the (re)distributors. This ensure the guest will read the
+ * correct values from the memory-mapped interface.
+ */
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ isb();
+ dsb(sy);
+ }
}
}
-void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
- u64 val;
- u32 nr_pre_bits;
- int i;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -298,70 +279,135 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
* consequences. So we must make sure that ICC_SRE_EL1 has
* been actually programmed with the value we want before
* starting to mess with the rest of the GIC, and VMCR_EL2 in
- * particular.
+ * particular. This logic must be called before
+ * __vgic_v3_restore_state().
*/
if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+
+ if (has_vhe()) {
+ /*
+ * Ensure that the write to the VMCR will have reached
+ * the (re)distributors. This ensure the guest will
+ * read the correct values from the memory-mapped
+ * interface.
+ */
+ isb();
+ dsb(sy);
+ }
}
- val = read_gicreg(ICH_VTR_EL2);
- nr_pre_bits = vtr_to_nr_pre_bits(val);
+ /*
+ * Prevent the guest from touching the GIC system registers if
+ * SRE isn't enabled for GICv3 emulation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
- if (used_lrs) {
+ /*
+ * If we need to trap system registers, we must write
+ * ICH_HCR_EL2 anyway, even if no interrupts are being
+ * injected,
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+}
- switch (nr_pre_bits) {
- case 7:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
- case 6:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
- default:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
- }
-
- switch (nr_pre_bits) {
- case 7:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
- case 6:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
- default:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
- }
+void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 val;
- for (i = 0; i < used_lrs; i++)
- __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
- } else {
- /*
- * If we need to trap system registers, we must write
- * ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected. Same thing if GICv4 is used, as VLPI
- * delivery is gated by ICH_HCR_EL2.En.
- */
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
- write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+ if (!cpu_if->vgic_sre) {
+ cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
- /*
- * Ensures that the above will have reached the
- * (re)distributors. This ensure the guest will read the
- * correct values from the memory-mapped interface.
- */
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+
if (!cpu_if->vgic_sre) {
+ /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
isb();
- dsb(sy);
+ write_gicreg(1, ICC_SRE_EL1);
}
/*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
+ * If we were trapping system registers, we enabled the VGIC even if
+ * no interrupts were being injected, and we disable it again here.
*/
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
+ write_gicreg(0, ICH_HCR_EL2);
+}
+
+void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ u64 val;
+ u32 nr_pre_bits;
+
+ vcpu = kern_hyp_va(vcpu);
+ cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+ cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
+ case 6:
+ cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
+ default:
+ cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+ cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
+ case 6:
+ cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
+ default:
+ cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
+ }
+}
+
+void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ u64 val;
+ u32 nr_pre_bits;
+
+ vcpu = kern_hyp_va(vcpu);
+ cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
+ case 6:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
+ default:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
+ case 6:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
+ default:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
+ }
}
void __hyp_text __vgic_v3_init_lrs(void)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ec62d1cccab7..7f6a944db23d 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+static unsigned long io_map_base;
+
#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
@@ -479,7 +481,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
clear_hyp_pgd_entry(pgd);
}
-static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
+{
+ return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
+}
+
+static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
+ phys_addr_t start, u64 size)
{
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
@@ -489,7 +497,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
* We don't unmap anything from HYP, except at the hyp tear down.
* Hence, we don't have to invalidate the TLBs here.
*/
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
do {
next = pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -497,32 +505,50 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
} while (pgd++, addr = next, addr != end);
}
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
+}
+
+static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
+}
+
/**
* free_hyp_pgds - free Hyp-mode page tables
*
* Assumes hyp_pgd is a page table used strictly in Hyp-mode and
* therefore contains either mappings in the kernel memory area (above
- * PAGE_OFFSET), or device mappings in the vmalloc range (from
- * VMALLOC_START to VMALLOC_END).
+ * PAGE_OFFSET), or device mappings in the idmap range.
*
- * boot_hyp_pgd should only map two pages for the init code.
+ * boot_hyp_pgd should only map the idmap range, and is only used in
+ * the extended idmap case.
*/
void free_hyp_pgds(void)
{
+ pgd_t *id_pgd;
+
mutex_lock(&kvm_hyp_pgd_mutex);
+ id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
+
+ if (id_pgd) {
+ /* In case we never called hyp_mmu_init() */
+ if (!io_map_base)
+ io_map_base = hyp_idmap_start;
+ unmap_hyp_idmap_range(id_pgd, io_map_base,
+ hyp_idmap_start + PAGE_SIZE - io_map_base);
+ }
+
if (boot_hyp_pgd) {
- unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd) {
- unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
(uintptr_t)high_memory - PAGE_OFFSET);
- unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
- VMALLOC_END - VMALLOC_START);
free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
@@ -634,7 +660,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
addr = start & PAGE_MASK;
end = PAGE_ALIGN(end);
do {
- pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
+ pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
@@ -708,29 +734,115 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
return 0;
}
+static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
+ unsigned long *haddr, pgprot_t prot)
+{
+ pgd_t *pgd = hyp_pgd;
+ unsigned long base;
+ int ret = 0;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+
+ /*
+ * This assumes that we we have enough space below the idmap
+ * page to allocate our VAs. If not, the check below will
+ * kick. A potential alternative would be to detect that
+ * overflow and switch to an allocation above the idmap.
+ *
+ * The allocated size is always a multiple of PAGE_SIZE.
+ */
+ size = PAGE_ALIGN(size + offset_in_page(phys_addr));
+ base = io_map_base - size;
+
+ /*
+ * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+ * allocating the new area, as it would indicate we've
+ * overflowed the idmap/IO address range.
+ */
+ if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+ ret = -ENOMEM;
+ else
+ io_map_base = base;
+
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+
+ if (ret)
+ goto out;
+
+ if (__kvm_cpu_uses_extended_idmap())
+ pgd = boot_hyp_pgd;
+
+ ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
+ base, base + size,
+ __phys_to_pfn(phys_addr), prot);
+ if (ret)
+ goto out;
+
+ *haddr = base + offset_in_page(phys_addr);
+
+out:
+ return ret;
+}
+
/**
- * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
- * @from: The kernel start VA of the range
- * @to: The kernel end VA of the range (exclusive)
+ * create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
- *
- * The resulting HYP VA is the same as the kernel VA, modulo
- * HYP_PAGE_OFFSET.
+ * @size: Size of the region being mapped
+ * @kaddr: Kernel VA for this mapping
+ * @haddr: HYP VA for this mapping
*/
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr)
{
- unsigned long start = kern_hyp_va((unsigned long)from);
- unsigned long end = kern_hyp_va((unsigned long)to);
+ unsigned long addr;
+ int ret;
- if (is_kernel_in_hyp_mode())
+ *kaddr = ioremap(phys_addr, size);
+ if (!*kaddr)
+ return -ENOMEM;
+
+ if (is_kernel_in_hyp_mode()) {
+ *haddr = *kaddr;
return 0;
+ }
- /* Check for a valid kernel IO mapping */
- if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
- return -EINVAL;
+ ret = __create_hyp_private_mapping(phys_addr, size,
+ &addr, PAGE_HYP_DEVICE);
+ if (ret) {
+ iounmap(*kaddr);
+ *kaddr = NULL;
+ *haddr = NULL;
+ return ret;
+ }
+
+ *haddr = (void __iomem *)addr;
+ return 0;
+}
- return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
- __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+/**
+ * create_hyp_exec_mappings - Map an executable range into HYP
+ * @phys_addr: The physical start address which gets mapped
+ * @size: Size of the region being mapped
+ * @haddr: HYP VA for this mapping
+ */
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr)
+{
+ unsigned long addr;
+ int ret;
+
+ BUG_ON(is_kernel_in_hyp_mode());
+
+ ret = __create_hyp_private_mapping(phys_addr, size,
+ &addr, PAGE_HYP_EXEC);
+ if (ret) {
+ *haddr = NULL;
+ return ret;
+ }
+
+ *haddr = (void *)addr;
+ return 0;
}
/**
@@ -1801,7 +1913,9 @@ int kvm_mmu_init(void)
int err;
hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+ hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+ hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
/*
@@ -1810,12 +1924,13 @@ int kvm_mmu_init(void)
*/
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
- kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
- kvm_info("HYP VA range: %lx:%lx\n",
- kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
+ kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
+ kvm_debug("HYP VA range: %lx:%lx\n",
+ kern_hyp_va(PAGE_OFFSET),
+ kern_hyp_va((unsigned long)high_memory - 1));
if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
- hyp_idmap_start < kern_hyp_va(~0UL) &&
+ hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
/*
* The idmap page is intersecting with the VA space,
@@ -1859,6 +1974,7 @@ int kvm_mmu_init(void)
goto out;
}
+ io_map_base = hyp_idmap_start;
return 0;
out:
free_hyp_pgds();
@@ -2035,7 +2151,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
*/
void kvm_set_way_flush(struct kvm_vcpu *vcpu)
{
- unsigned long hcr = vcpu_get_hcr(vcpu);
+ unsigned long hcr = *vcpu_hcr(vcpu);
/*
* If this is the first time we do a S/W operation
@@ -2050,7 +2166,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu)
trace_kvm_set_way_flush(*vcpu_pc(vcpu),
vcpu_has_cache_enabled(vcpu));
stage2_flush_vm(vcpu->kvm);
- vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+ *vcpu_hcr(vcpu) = hcr | HCR_TVM;
}
}
@@ -2068,7 +2184,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
/* Caches are now on, stop trapping VM ops (until a S/W op) */
if (now_enabled)
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+ *vcpu_hcr(vcpu) &= ~HCR_TVM;
trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
}
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8a9c42366db7..1c5b76c46e26 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
- counter = vcpu_sys_reg(vcpu, reg);
+ counter = __vcpu_sys_reg(vcpu, reg);
/* The real counter value is equal to the value of counter register plus
* the value perf event counts.
@@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
- vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
+ __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
}
/**
@@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- vcpu_sys_reg(vcpu, reg) = counter;
+ __vcpu_sys_reg(vcpu, reg) = counter;
perf_event_disable(pmc->perf_event);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
@@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
{
- u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
+ u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
val &= ARMV8_PMU_PMCR_N_MASK;
if (val == 0)
@@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
- if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
+ if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
return;
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
{
u64 reg = 0;
- if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
- reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
- reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
- reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
+ if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
+ reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
+ reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+ reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
reg &= kvm_pmu_valid_counter_mask(vcpu);
}
@@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
if (kvm_pmu_overflow_status(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
@@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
if (val == 0)
return;
- enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+ enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
if (!(val & BIT(i)))
continue;
- type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
+ type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
& ARMV8_PMU_EVTYPE_EVENT;
if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
- reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+ reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
- vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+ __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
if (!reg)
- vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+ __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
}
}
}
@@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
mask = kvm_pmu_valid_counter_mask(vcpu);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter(vcpu,
- vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
} else {
kvm_pmu_disable_counter(vcpu, mask);
}
@@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
{
- return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
- (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
+ return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
+ (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
}
/**
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 743ca5cb05ef..68378fe17a0e 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
- /*
- * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
- * it is stored in distributor struct for asm save/restore purpose
- */
- kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
-
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
@@ -302,17 +296,6 @@ int vgic_init(struct kvm *kvm)
dist->initialized = true;
- /*
- * If we're initializing GICv2 on-demand when first running the VCPU
- * then we need to load the VGIC state onto the CPU. We can detect
- * this easily by checking if we are in between vcpu_load and vcpu_put
- * when we just initialized the VGIC.
- */
- preempt_disable();
- vcpu = kvm_arm_get_running_vcpu();
- if (vcpu)
- kvm_vgic_load(vcpu);
- preempt_enable();
out:
return ret;
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 465095355666..a8f07243aa9f 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_irq *irq;
u32 *intids;
- int irq_count = dist->lpi_list_count, i = 0;
+ int irq_count, i = 0;
/*
- * We use the current value of the list length, which may change
- * after the kmalloc. We don't care, because the guest shouldn't
- * change anything while the command handling is still running,
- * and in the worst case we would miss a new IRQ, which one wouldn't
- * expect to be covered by this command anyway.
+ * There is an obvious race between allocating the array and LPIs
+ * being mapped/unmapped. If we ended up here as a result of a
+ * command, we're safe (locks are held, preventing another
+ * command). If coming from another path (such as enabling LPIs),
+ * we must be careful not to overrun the array.
*/
+ irq_count = READ_ONCE(dist->lpi_list_count);
intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
if (!intids)
return -ENOMEM;
spin_lock(&dist->lpi_list_lock);
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+ if (i == irq_count)
+ break;
/* We don't need to "get" the IRQ, as we hold the list lock. */
if (irq->target_vcpu != vcpu)
continue;
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 83d82bd7dc4e..dbe99d635c80 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
/* Loop over all IRQs affected by this read */
for (i = 0; i < len * 8; i++) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ unsigned long flags;
+ spin_lock_irqsave(&irq->irq_lock, flags);
if (irq_is_pending(irq))
value |= (1U << i);
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
}
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index c32d7b93ffd1..45aa433f018f 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void)
vgic_v2_write_lr(i, 0);
}
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+
+ cpuif->vgic_hcr |= GICH_HCR_NPIE;
+}
+
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
@@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
int lr;
unsigned long flags;
- cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+ cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE);
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
@@ -98,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/*
* Clear soft pending state when level irqs have been acked.
- * Always regenerate the pending state.
*/
- if (irq->config == VGIC_CONFIG_LEVEL) {
- if (!(val & GICH_LR_PENDING_BIT))
- irq->pending_latch = false;
- }
+ if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
+ irq->pending_latch = false;
/*
* Level-triggered mapped IRQs are special because we only
@@ -146,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 val = irq->intid;
+ bool allow_pending = true;
+
+ if (irq->active)
+ val |= GICH_LR_ACTIVE_BIT;
+
+ if (irq->hw) {
+ val |= GICH_LR_HW;
+ val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
+ /*
+ * Never set pending+active on a HW interrupt, as the
+ * pending state is kept at the physical distributor
+ * level.
+ */
+ if (irq->active)
+ allow_pending = false;
+ } else {
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ val |= GICH_LR_EOI;
+
+ /*
+ * Software resampling doesn't work very well
+ * if we allow P+A, so let's not do that.
+ */
+ if (irq->active)
+ allow_pending = false;
+ }
+ }
- if (irq_is_pending(irq)) {
+ if (allow_pending && irq_is_pending(irq)) {
val |= GICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
@@ -164,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
- if (irq->active)
- val |= GICH_LR_ACTIVE_BIT;
-
- if (irq->hw) {
- val |= GICH_LR_HW;
- val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
- /*
- * Never set pending+active on a HW interrupt, as the
- * pending state is kept at the physical distributor
- * level.
- */
- if (irq->active && irq_is_pending(irq))
- val &= ~GICH_LR_PENDING_BIT;
- } else {
- if (irq->config == VGIC_CONFIG_LEVEL)
- val |= GICH_LR_EOI;
- }
-
/*
* Level-triggered mapped IRQs are special because we only observe
* rising edges as input to the VGIC. We therefore lower the line
@@ -265,7 +278,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
- vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
/* Get the show on the road... */
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
@@ -361,16 +373,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
if (!PAGE_ALIGNED(info->vcpu.start) ||
!PAGE_ALIGNED(resource_size(&info->vcpu))) {
kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n");
- kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start,
- resource_size(&info->vcpu));
- if (!kvm_vgic_global_state.vcpu_base_va) {
- kvm_err("Cannot ioremap GICV\n");
- return -ENOMEM;
- }
- ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va,
- kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu),
- info->vcpu.start);
+ ret = create_hyp_io_mappings(info->vcpu.start,
+ resource_size(&info->vcpu),
+ &kvm_vgic_global_state.vcpu_base_va,
+ &kvm_vgic_global_state.vcpu_hyp_va);
if (ret) {
kvm_err("Cannot map GICV into hyp\n");
goto out;
@@ -379,26 +386,18 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
static_branch_enable(&vgic_v2_cpuif_trap);
}
- kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
- resource_size(&info->vctrl));
- if (!kvm_vgic_global_state.vctrl_base) {
- kvm_err("Cannot ioremap GICH\n");
- ret = -ENOMEM;
+ ret = create_hyp_io_mappings(info->vctrl.start,
+ resource_size(&info->vctrl),
+ &kvm_vgic_global_state.vctrl_base,
+ &kvm_vgic_global_state.vctrl_hyp);
+ if (ret) {
+ kvm_err("Cannot map VCTRL into hyp\n");
goto out;
}
vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
- ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
- kvm_vgic_global_state.vctrl_base +
- resource_size(&info->vctrl),
- info->vctrl.start);
- if (ret) {
- kvm_err("Cannot map VCTRL into hyp\n");
- goto out;
- }
-
ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
if (ret) {
kvm_err("Cannot register GICv2 KVM device\n");
@@ -410,7 +409,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.type = VGIC_V2;
kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
- kvm_info("vgic-v2@%llx\n", info->vctrl.start);
+ kvm_debug("vgic-v2@%llx\n", info->vctrl.start);
return 0;
out:
@@ -422,18 +421,74 @@ out:
return ret;
}
+static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
+{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ u64 elrsr;
+ int i;
+
+ elrsr = readl_relaxed(base + GICH_ELRSR0);
+ if (unlikely(used_lrs > 32))
+ elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32;
+
+ for (i = 0; i < used_lrs; i++) {
+ if (elrsr & (1UL << i))
+ cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
+ else
+ cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+
+ writel_relaxed(0, base + GICH_LR0 + (i * 4));
+ }
+}
+
+void vgic_v2_save_state(struct kvm_vcpu *vcpu)
+{
+ void __iomem *base = kvm_vgic_global_state.vctrl_base;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+
+ if (!base)
+ return;
+
+ if (used_lrs) {
+ save_lrs(vcpu, base);
+ writel_relaxed(0, base + GICH_HCR);
+ }
+}
+
+void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ void __iomem *base = kvm_vgic_global_state.vctrl_base;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ int i;
+
+ if (!base)
+ return;
+
+ if (used_lrs) {
+ writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
+ for (i = 0; i < used_lrs; i++) {
+ writel_relaxed(cpu_if->vgic_lr[i],
+ base + GICH_LR0 + (i * 4));
+ }
+ }
+}
+
void vgic_v2_load(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
- writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR);
+ writel_relaxed(cpu_if->vgic_vmcr,
+ kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+ writel_relaxed(cpu_if->vgic_apr,
+ kvm_vgic_global_state.vctrl_base + GICH_APR);
}
void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
- cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR);
+ cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+ cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 6b329414e57a..8195f52ae6f0 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -16,6 +16,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
+#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_asm.h>
@@ -26,6 +27,13 @@ static bool group1_trap;
static bool common_trap;
static bool gicv4_enable;
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ cpuif->vgic_hcr |= ICH_HCR_NPIE;
+}
+
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -47,7 +55,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
int lr;
unsigned long flags;
- cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+ cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE);
for (lr = 0; lr < vgic_cpu->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
@@ -89,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
/*
* Clear soft pending state when level irqs have been acked.
- * Always regenerate the pending state.
*/
- if (irq->config == VGIC_CONFIG_LEVEL) {
- if (!(val & ICH_LR_PENDING_BIT))
- irq->pending_latch = false;
- }
+ if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
+ irq->pending_latch = false;
/*
* Level-triggered mapped IRQs are special because we only
@@ -128,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
+ bool allow_pending = true;
+
+ if (irq->active)
+ val |= ICH_LR_ACTIVE_BIT;
+
+ if (irq->hw) {
+ val |= ICH_LR_HW;
+ val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
+ /*
+ * Never set pending+active on a HW interrupt, as the
+ * pending state is kept at the physical distributor
+ * level.
+ */
+ if (irq->active)
+ allow_pending = false;
+ } else {
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ val |= ICH_LR_EOI;
- if (irq_is_pending(irq)) {
+ /*
+ * Software resampling doesn't work very well
+ * if we allow P+A, so let's not do that.
+ */
+ if (irq->active)
+ allow_pending = false;
+ }
+ }
+
+ if (allow_pending && irq_is_pending(irq)) {
val |= ICH_LR_PENDING_BIT;
if (irq->config == VGIC_CONFIG_EDGE)
@@ -147,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
}
}
- if (irq->active)
- val |= ICH_LR_ACTIVE_BIT;
-
- if (irq->hw) {
- val |= ICH_LR_HW;
- val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
- /*
- * Never set pending+active on a HW interrupt, as the
- * pending state is kept at the physical distributor
- * level.
- */
- if (irq->active && irq_is_pending(irq))
- val &= ~ICH_LR_PENDING_BIT;
- } else {
- if (irq->config == VGIC_CONFIG_LEVEL)
- val |= ICH_LR_EOI;
- }
-
/*
* Level-triggered mapped IRQs are special because we only observe
* rising edges as input to the VGIC. We therefore lower the line
@@ -267,7 +281,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vgic_v3->vgic_vmcr = 0;
- vgic_v3->vgic_elrsr = ~0;
/*
* If we are emulating a GICv3, we do it in an non-GICv2-compatible
@@ -588,6 +601,11 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
*/
if (likely(cpu_if->vgic_sre))
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
+
+ kvm_call_hyp(__vgic_v3_restore_aprs, vcpu);
+
+ if (has_vhe())
+ __vgic_v3_activate_traps(vcpu);
}
void vgic_v3_put(struct kvm_vcpu *vcpu)
@@ -596,4 +614,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
if (likely(cpu_if->vgic_sre))
cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+
+ kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
+
+ if (has_vhe())
+ __vgic_v3_deactivate_traps(vcpu);
}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index c7c5ef190afa..e74baec76361 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -19,6 +19,7 @@
#include <linux/list_sort.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <asm/kvm_hyp.h>
#include "vgic.h"
@@ -495,6 +496,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
return ret;
}
+/**
+ * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
+ * @vcpu: The VCPU pointer
+ * @vintid: The INTID of the interrupt
+ *
+ * Reset the active and pending states of a mapped interrupt. Kernel
+ * subsystems injecting mapped interrupts should reset their interrupt lines
+ * when we are doing a reset of the VM.
+ */
+void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
+ unsigned long flags;
+
+ if (!irq->hw)
+ goto out;
+
+ spin_lock_irqsave(&irq->irq_lock, flags);
+ irq->active = false;
+ irq->pending_latch = false;
+ irq->line_level = false;
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
+out:
+ vgic_put_irq(vcpu->kvm, irq);
+}
+
int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
{
struct vgic_irq *irq;
@@ -684,22 +711,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
vgic_v3_set_underflow(vcpu);
}
+static inline void vgic_set_npie(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_set_npie(vcpu);
+ else
+ vgic_v3_set_npie(vcpu);
+}
+
/* Requires the ap_list_lock to be held. */
-static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
+static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
+ bool *multi_sgi)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
int count = 0;
+ *multi_sgi = false;
+
DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
/* GICv2 SGIs can count for more than one... */
- if (vgic_irq_is_sgi(irq->intid) && irq->source)
- count += hweight8(irq->source);
- else
+ if (vgic_irq_is_sgi(irq->intid) && irq->source) {
+ int w = hweight8(irq->source);
+
+ count += w;
+ *multi_sgi |= (w > 1);
+ } else {
count++;
+ }
spin_unlock(&irq->irq_lock);
}
return count;
@@ -710,28 +752,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
- int count = 0;
+ int count;
+ bool npie = false;
+ bool multi_sgi;
+ u8 prio = 0xff;
DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
- if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr)
+ count = compute_ap_list_depth(vcpu, &multi_sgi);
+ if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
vgic_sort_ap_list(vcpu);
+ count = 0;
+
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
spin_lock(&irq->irq_lock);
- if (unlikely(vgic_target_oracle(irq) != vcpu))
- goto next;
-
/*
- * If we get an SGI with multiple sources, try to get
- * them in all at once.
+ * If we have multi-SGIs in the pipeline, we need to
+ * guarantee that they are all seen before any IRQ of
+ * lower priority. In that case, we need to filter out
+ * these interrupts by exiting early. This is easy as
+ * the AP list has been sorted already.
*/
- do {
+ if (multi_sgi && irq->priority > prio) {
+ spin_unlock(&irq->irq_lock);
+ break;
+ }
+
+ if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
- } while (irq->source && count < kvm_vgic_global_state.nr_lr);
-next:
+ if (irq->source) {
+ npie = true;
+ prio = irq->priority;
+ }
+ }
+
spin_unlock(&irq->irq_lock);
if (count == kvm_vgic_global_state.nr_lr) {
@@ -742,6 +799,9 @@ next:
}
}
+ if (npie)
+ vgic_set_npie(vcpu);
+
vcpu->arch.vgic_cpu.used_lrs = count;
/* Nuke remaining LRs */
@@ -749,6 +809,24 @@ next:
vgic_clear_lr(vcpu, count);
}
+static inline bool can_access_vgic_from_kernel(void)
+{
+ /*
+ * GICv2 can always be accessed from the kernel because it is
+ * memory-mapped, and VHE systems can access GICv3 EL2 system
+ * registers.
+ */
+ return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
+}
+
+static inline void vgic_save_state(struct kvm_vcpu *vcpu)
+{
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v2_save_state(vcpu);
+ else
+ __vgic_v3_save_state(vcpu);
+}
+
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
@@ -760,11 +838,22 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
+ if (can_access_vgic_from_kernel())
+ vgic_save_state(vcpu);
+
if (vgic_cpu->used_lrs)
vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu);
}
+static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ vgic_v2_restore_state(vcpu);
+ else
+ __vgic_v3_restore_state(vcpu);
+}
+
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
@@ -787,6 +876,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
vgic_flush_lr_state(vcpu);
spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+
+ if (can_access_vgic_from_kernel())
+ vgic_restore_state(vcpu);
}
void kvm_vgic_load(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 12c37b89f7a3..830e815748a0 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -96,6 +96,7 @@
/* we only support 64 kB translation table page size */
#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
+/* Requires the irq_lock to be held by the caller. */
static inline bool irq_is_pending(struct vgic_irq *irq)
{
if (irq->config == VGIC_CONFIG_EDGE)
@@ -159,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
@@ -176,6 +178,9 @@ void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
+void vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
@@ -188,6 +193,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_enable(struct kvm_vcpu *vcpu);