summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2021-02-12 11:23:44 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2021-02-12 11:23:44 -0500
commit8c6e67bec3192f16fa624203c8131e10cc4814ba (patch)
tree533b140ab6bb926f5ec35f8e42d5a99590e0d945 /arch
parent7137b7ae6f20076a1e05546db61f9fb3d3f9e97d (diff)
parentc93199e93e1232b7220482dffa05b7a32a195fe8 (diff)
downloadlinux-8c6e67bec3192f16fa624203c8131e10cc4814ba.tar.bz2
Merge tag 'kvmarm-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for Linux 5.12 - Make the nVHE EL2 object relocatable, resulting in much more maintainable code - Handle concurrent translation faults hitting the same page in a more elegant way - Support for the standard TRNG hypervisor call - A bunch of small PMU/Debug fixes - Allow the disabling of symbol export from assembly code - Simplification of the early init hypercall handling
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/xen/enlighten.c2
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/atomic.h10
-rw-r--r--arch/arm64/include/asm/hyp_image.h29
-rw-r--r--arch/arm64/include/asm/kvm_asm.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h61
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h5
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/sections.h3
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/entry-ftrace.S12
-rw-r--r--arch/arm64/kernel/entry.S14
-rw-r--r--arch/arm64/kernel/image-vars.h1
-rw-r--r--arch/arm64/kernel/perf_event.c41
-rw-r--r--arch/arm64/kernel/probes/kprobes_trampoline.S6
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/arm64/kernel/syscall.c10
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S18
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/arm.c7
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/.gitignore2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile33
-rw-r--r--arch/arm64/kvm/hyp/nvhe/gen-hyprel.c438
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S29
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S19
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-smp.c4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp.lds.S9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c24
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c83
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c2
-rw-r--r--arch/arm64/kvm/hypercalls.c6
-rw-r--r--arch/arm64/kvm/mmu.c13
-rw-r--r--arch/arm64/kvm/pmu-emul.c14
-rw-r--r--arch/arm64/kvm/sys_regs.c85
-rw-r--r--arch/arm64/kvm/trng.c85
-rw-r--r--arch/arm64/kvm/va_layout.c34
-rw-r--r--arch/arm64/mm/init.c33
-rw-r--r--arch/mips/boot/compressed/decompress.c3
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c2
-rw-r--r--arch/mips/kernel/binfmt_elfn32.c7
-rw-r--r--arch/mips/kernel/binfmt_elfo32.c7
-rw-r--r--arch/mips/kernel/relocate.c10
-rw-r--r--arch/powerpc/include/asm/vdso/gettimeofday.h16
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S8
-rw-r--r--arch/riscv/Kconfig6
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts2
-rw-r--r--arch/riscv/configs/defconfig2
-rw-r--r--arch/riscv/include/asm/pgtable.h1
-rw-r--r--arch/riscv/include/asm/vdso.h2
-rw-r--r--arch/riscv/kernel/cacheinfo.c11
-rw-r--r--arch/riscv/kernel/entry.S24
-rw-r--r--arch/riscv/kernel/setup.c24
-rw-r--r--arch/riscv/kernel/stacktrace.c5
-rw-r--r--arch/riscv/kernel/time.c3
-rw-r--r--arch/riscv/kernel/vdso.c2
-rw-r--r--arch/riscv/mm/init.c16
-rw-r--r--arch/riscv/mm/kasan_init.c4
-rw-r--r--arch/x86/hyperv/hv_init.c4
-rw-r--r--arch/x86/hyperv/mmu.c12
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c18
-rw-r--r--arch/x86/xen/enlighten_hvm.c15
-rw-r--r--arch/x86/xen/smp_hvm.c27
68 files changed, 1051 insertions, 380 deletions
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 60e901cd0de6..5a957a9a0984 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -371,7 +371,7 @@ static int __init xen_guest_init(void)
}
gnttab_init();
if (!xen_initial_domain())
- xenbus_probe(NULL);
+ xenbus_probe();
/*
* Making sure board specific code will not set up ops for
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 05e17351e4f3..f39568b28ec1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -174,8 +174,6 @@ config ARM64
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
- select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
- select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 015ddffaf6ca..b56a4b2bc248 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -17,7 +17,7 @@
#include <asm/lse.h>
#define ATOMIC_OP(op) \
-static inline void arch_##op(int i, atomic_t *v) \
+static __always_inline void arch_##op(int i, atomic_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub)
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, op) \
-static inline int arch_##op##name(int i, atomic_t *v) \
+static __always_inline int arch_##op##name(int i, atomic_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return)
#undef ATOMIC_FETCH_OPS
#define ATOMIC64_OP(op) \
-static inline void arch_##op(long i, atomic64_t *v) \
+static __always_inline void arch_##op(long i, atomic64_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, op) \
-static inline long arch_##op##name(long i, atomic64_t *v) \
+static __always_inline long arch_##op##name(long i, atomic64_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}
diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h
index daa1a1da539e..737ded6b6d0d 100644
--- a/arch/arm64/include/asm/hyp_image.h
+++ b/arch/arm64/include/asm/hyp_image.h
@@ -7,6 +7,9 @@
#ifndef __ARM64_HYP_IMAGE_H__
#define __ARM64_HYP_IMAGE_H__
+#define __HYP_CONCAT(a, b) a ## b
+#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
+
/*
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
* to separate it from the kernel proper.
@@ -21,9 +24,31 @@
*/
#define HYP_SECTION_NAME(NAME) .hyp##NAME
+/* Symbol defined at the beginning of each hyp section. */
+#define HYP_SECTION_SYMBOL_NAME(NAME) \
+ HYP_CONCAT(__hyp_section_, HYP_SECTION_NAME(NAME))
+
+/*
+ * Helper to generate linker script statements starting a hyp section.
+ *
+ * A symbol with a well-known name is defined at the first byte. This
+ * is used as a base for hyp relocations (see gen-hyprel.c). It must
+ * be defined inside the section so the linker of `vmlinux` cannot
+ * separate it from the section data.
+ */
+#define BEGIN_HYP_SECTION(NAME) \
+ HYP_SECTION_NAME(NAME) : { \
+ HYP_SECTION_SYMBOL_NAME(NAME) = .;
+
+/* Helper to generate linker script statements ending a hyp section. */
+#define END_HYP_SECTION \
+ }
+
/* Defines an ELF hyp section from input section @NAME and its subsections. */
-#define HYP_SECTION(NAME) \
- HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) }
+#define HYP_SECTION(NAME) \
+ BEGIN_HYP_SECTION(NAME) \
+ *(NAME NAME##.*) \
+ END_HYP_SECTION
/*
* Defines a linker script alias of a kernel-proper symbol referenced by
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 8a33d83ea843..22d933e9b59e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -199,32 +199,6 @@ extern void __vgic_v3_init_lrs(void);
extern u32 __kvm_get_mdcr_el2(void);
-#if defined(GCC_VERSION) && GCC_VERSION < 50000
-#define SYM_CONSTRAINT "i"
-#else
-#define SYM_CONSTRAINT "S"
-#endif
-
-/*
- * Obtain the PC-relative address of a kernel symbol
- * s: symbol
- *
- * The goal of this macro is to return a symbol's address based on a
- * PC-relative computation, as opposed to a loading the VA from a
- * constant pool or something similar. This works well for HYP, as an
- * absolute VA is guaranteed to be wrong. Only use this if trying to
- * obtain the address of a symbol (i.e. not something you obtained by
- * following a pointer).
- */
-#define hyp_symbol_addr(s) \
- ({ \
- typeof(s) *addr; \
- asm("adrp %0, %1\n" \
- "add %0, %0, :lo12:%1\n" \
- : "=r" (addr) : SYM_CONSTRAINT (&s)); \
- addr; \
- })
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1b8a3d825276..3d10e6527f7d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -770,4 +770,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
+int kvm_trng_call(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e52d82aeadca..90873851f677 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -73,49 +73,39 @@ alternative_cb_end
.endm
/*
- * Convert a kernel image address to a PA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a PA
+ * reg: hypervisor address to be converted in place
* tmp: temporary register
- *
- * The actual code generation takes place in kvm_get_kimage_voffset, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_get_kimage_voffset uses the
- * specific registers encoded in the instructions).
*/
-.macro kimg_pa reg, tmp
-alternative_cb kvm_get_kimage_voffset
- movz \tmp, #0
- movk \tmp, #0, lsl #16
- movk \tmp, #0, lsl #32
- movk \tmp, #0, lsl #48
-alternative_cb_end
-
- /* reg = __pa(reg) */
- sub \reg, \reg, \tmp
+.macro hyp_pa reg, tmp
+ ldr_l \tmp, hyp_physvirt_offset
+ add \reg, \reg, \tmp
.endm
/*
- * Convert a kernel image address to a hyp VA
- * reg: kernel address to be converted in place
+ * Convert a hypervisor VA to a kernel image address
+ * reg: hypervisor address to be converted in place
* tmp: temporary register
*
* The actual code generation takes place in kvm_get_kimage_voffset, and
* the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_kimg_phys_offset uses the
+ * perform the register allocation (kvm_get_kimage_voffset uses the
* specific registers encoded in the instructions).
*/
-.macro kimg_hyp_va reg, tmp
-alternative_cb kvm_update_kimg_phys_offset
+.macro hyp_kimg_va reg, tmp
+ /* Convert hyp VA -> PA. */
+ hyp_pa \reg, \tmp
+
+ /* Load kimage_voffset. */
+alternative_cb kvm_get_kimage_voffset
movz \tmp, #0
movk \tmp, #0, lsl #16
movk \tmp, #0, lsl #32
movk \tmp, #0, lsl #48
alternative_cb_end
- sub \reg, \reg, \tmp
- mov_q \tmp, PAGE_OFFSET
- orr \reg, \reg, \tmp
- kern_hyp_va \reg
+ /* Convert PA -> kimg VA. */
+ add \reg, \reg, \tmp
.endm
#else
@@ -129,6 +119,7 @@ alternative_cb_end
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
void kvm_compute_layout(void);
+void kvm_apply_hyp_relocations(void);
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
{
@@ -144,24 +135,6 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
-static __always_inline unsigned long __kimg_hyp_va(unsigned long v)
-{
- unsigned long offset;
-
- asm volatile(ALTERNATIVE_CB("movz %0, #0\n"
- "movk %0, #0, lsl #16\n"
- "movk %0, #0, lsl #32\n"
- "movk %0, #0, lsl #48\n",
- kvm_update_kimg_phys_offset)
- : "=r" (offset));
-
- return __kern_hyp_va((v - offset) | PAGE_OFFSET);
-}
-
-#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v))))
-
-#define kimg_fn_ptr(x) (typeof(x) **)(x)
-
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 52ab38db04c7..8886d43cfb11 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -157,6 +157,11 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* If device attributes are not explicitly requested in @prot, then the
* mapping will be normal, cacheable.
*
+ * Note that the update of a valid leaf PTE in this function will be aborted,
+ * if it's trying to recreate the exact same mapping or only change the access
+ * permissions. Instead, the vCPU will exit one more time from guest if still
+ * needed and then go through the path of relaxing permissions.
+ *
* Note that this function will both coalesce existing table entries and split
* existing block mappings, relying on page-faults to fault back areas outside
* of the new mapping lazily.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 69ad25fbeae4..ca2cd75d3286 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -94,8 +94,7 @@
#endif /* CONFIG_ARM64_FORCE_52BIT */
extern phys_addr_t arm64_dma_phys_limit;
-extern phys_addr_t arm64_dma32_phys_limit;
-#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
+#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
struct debug_info {
#ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 8ff579361731..2f36b16a5b5d 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,7 +11,8 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
-extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[];
+extern char __hyp_rodata_start[], __hyp_rodata_end[];
+extern char __hyp_reloc_begin[], __hyp_reloc_end[];
extern char __idmap_text_start[], __idmap_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5c3cc8..2fb3f386588c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -846,7 +846,10 @@
#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_8_0 0x3
#define ID_DFR0_PERFMON_8_1 0x4
+#define ID_DFR0_PERFMON_8_4 0x5
+#define ID_DFR0_PERFMON_8_5 0x6
#define ID_ISAR4_SWP_FRAC_SHIFT 28
#define ID_ISAR4_PSR_M_SHIFT 24
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f42fd9e33981..301784463587 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,7 +75,7 @@ int main(void)
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
- DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
+ DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_COMPAT
DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index a338f40e64d3..b3e4f9a088b1 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -35,7 +35,7 @@
*/
.macro ftrace_regs_entry, allregs=0
/* Make room for pt_regs, plus a callee frame */
- sub sp, sp, #(S_FRAME_SIZE + 16)
+ sub sp, sp, #(PT_REGS_SIZE + 16)
/* Save function arguments (and x9 for simplicity) */
stp x0, x1, [sp, #S_X0]
@@ -61,15 +61,15 @@
.endif
/* Save the callsite's SP and LR */
- add x10, sp, #(S_FRAME_SIZE + 16)
+ add x10, sp, #(PT_REGS_SIZE + 16)
stp x9, x10, [sp, #S_LR]
/* Save the PC after the ftrace callsite */
str x30, [sp, #S_PC]
/* Create a frame record for the callsite above pt_regs */
- stp x29, x9, [sp, #S_FRAME_SIZE]
- add x29, sp, #S_FRAME_SIZE
+ stp x29, x9, [sp, #PT_REGS_SIZE]
+ add x29, sp, #PT_REGS_SIZE
/* Create our frame record within pt_regs. */
stp x29, x30, [sp, #S_STACKFRAME]
@@ -120,7 +120,7 @@ ftrace_common_return:
ldr x9, [sp, #S_PC]
/* Restore the callsite's SP */
- add sp, sp, #S_FRAME_SIZE + 16
+ add sp, sp, #PT_REGS_SIZE + 16
ret x9
SYM_CODE_END(ftrace_common)
@@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller)
ldr x0, [sp, #S_PC]
sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
add x1, sp, #S_LR // parent_ip (callsite's LR)
- ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
+ ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP)
bl prepare_ftrace_return
b ftrace_common_return
SYM_CODE_END(ftrace_graph_caller)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a8c3e7aaca74..c9bae73f2621 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -75,7 +75,7 @@ alternative_else_nop_endif
.endif
#endif
- sub sp, sp, #S_FRAME_SIZE
+ sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
@@ -96,7 +96,7 @@ alternative_else_nop_endif
* userspace, and can clobber EL0 registers to free up GPRs.
*/
- /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
+ /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
@@ -253,7 +253,7 @@ alternative_else_nop_endif
scs_load tsk, x20
.else
- add x21, sp, #S_FRAME_SIZE
+ add x21, sp, #PT_REGS_SIZE
get_current_task tsk
.endif /* \el == 0 */
mrs x22, elr_el1
@@ -377,7 +377,7 @@ alternative_else_nop_endif
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
- add sp, sp, #S_FRAME_SIZE // restore sp
+ add sp, sp, #PT_REGS_SIZE // restore sp
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
@@ -580,12 +580,12 @@ __bad_stack:
/*
* Store the original GPRs to the new stack. The orginal SP (minus
- * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
+ * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry.
*/
- sub sp, sp, #S_FRAME_SIZE
+ sub sp, sp, #PT_REGS_SIZE
kernel_entry 1
mrs x0, tpidr_el0
- add x0, x0, #S_FRAME_SIZE
+ add x0, x0, #PT_REGS_SIZE
str x0, [sp, #S_SP]
/* Stash the regs for handle_bad_stack */
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index f676243abac6..23f1a557bd9f 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,7 +64,6 @@ __efistub__ctype = _ctype;
/* Alternative callbacks for init-time patching of nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
KVM_NVHE_ALIAS(kvm_update_va_mask);
-KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset);
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
/* Global kernel state accessed by nVHE hyp code. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 38bb07eff872..3605f77ad4df 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -23,8 +23,6 @@
#include <linux/platform_device.h>
#include <linux/sched_clock.h>
#include <linux/smp.h>
-#include <linux/nmi.h>
-#include <linux/cpufreq.h>
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = {
static int __init armv8_pmu_driver_init(void)
{
- int ret;
-
if (acpi_disabled)
- ret = platform_driver_register(&armv8_pmu_driver);
+ return platform_driver_register(&armv8_pmu_driver);
else
- ret = arm_pmu_acpi_probe(armv8_pmuv3_init);
-
- /*
- * Try to re-initialize lockup detector after PMU init in
- * case PMU events are triggered via NMIs.
- */
- if (ret == 0 && arm_pmu_irq_is_nmi())
- lockup_detector_init();
-
- return ret;
+ return arm_pmu_acpi_probe(armv8_pmuv3_init);
}
device_initcall(armv8_pmu_driver_init)
@@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time_zero = 1;
userpg->cap_user_time_short = 1;
}
-
-#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
-/*
- * Safe maximum CPU frequency in case a particular platform doesn't implement
- * cpufreq driver. Although, architecture doesn't put any restrictions on
- * maximum frequency but 5 GHz seems to be safe maximum given the available
- * Arm CPUs in the market which are clocked much less than 5 GHz. On the other
- * hand, we can't make it much higher as it would lead to a large hard-lockup
- * detection timeout on parts which are running slower (eg. 1GHz on
- * Developerbox) and doesn't possess a cpufreq driver.
- */
-#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
-u64 hw_nmi_get_sample_period(int watchdog_thresh)
-{
- unsigned int cpu = smp_processor_id();
- unsigned long max_cpu_freq;
-
- max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
- if (!max_cpu_freq)
- max_cpu_freq = SAFE_MAX_CPU_FREQ;
-
- return (u64)max_cpu_freq * watchdog_thresh;
-}
-#endif
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 890ca72c5a51..288a84e253cc 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -25,7 +25,7 @@
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
- add x0, sp, #S_FRAME_SIZE
+ add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
@@ -62,7 +62,7 @@
.endm
SYM_CODE_START(kretprobe_trampoline)
- sub sp, sp, #S_FRAME_SIZE
+ sub sp, sp, #PT_REGS_SIZE
save_all_base_regs
@@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline)
restore_all_base_regs
- add sp, sp, #S_FRAME_SIZE
+ add sp, sp, #PT_REGS_SIZE
ret
SYM_CODE_END(kretprobe_trampoline)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f71d6ce4673f..6237486ff6bb 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs)
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned long thread_flags)
{
- /*
- * The assembly code enters us with IRQs off, but it hasn't
- * informed the tracing code of that for efficiency reasons.
- * Update the trace code with the current status.
- */
- trace_hardirqs_off();
-
do {
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ad00f99ee9b0..357590beaabb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -434,8 +434,10 @@ static void __init hyp_mode_check(void)
"CPU: CPUs started in inconsistent modes");
else
pr_info("CPU: All CPU(s) started at EL1\n");
- if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode())
+ if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) {
kvm_compute_layout();
+ kvm_apply_hyp_relocations();
+ }
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index f61e9d8cc55a..c2877c332f2d 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -9,6 +9,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
+#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
@@ -165,15 +166,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
local_daif_mask();
flags = current_thread_info()->flags;
- if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
- /*
- * We're off to userspace, where interrupts are
- * always enabled after we restore the flags from
- * the SPSR.
- */
- trace_hardirqs_on();
+ if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
- }
local_daif_restore(DAIF_PROCCTX);
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 4c0b0c89ad59..636ca45aa1d4 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -31,10 +31,11 @@ jiffies = jiffies_64;
__stop___kvm_ex_table = .;
#define HYPERVISOR_DATA_SECTIONS \
- HYP_SECTION_NAME(.data..ro_after_init) : { \
- __hyp_data_ro_after_init_start = .; \
+ HYP_SECTION_NAME(.rodata) : { \
+ __hyp_rodata_start = .; \
*(HYP_SECTION_NAME(.data..ro_after_init)) \
- __hyp_data_ro_after_init_end = .; \
+ *(HYP_SECTION_NAME(.rodata)) \
+ __hyp_rodata_end = .; \
}
#define HYPERVISOR_PERCPU_SECTION \
@@ -42,10 +43,19 @@ jiffies = jiffies_64;
HYP_SECTION_NAME(.data..percpu) : { \
*(HYP_SECTION_NAME(.data..percpu)) \
}
+
+#define HYPERVISOR_RELOC_SECTION \
+ .hyp.reloc : ALIGN(4) { \
+ __hyp_reloc_begin = .; \
+ *(.hyp.reloc) \
+ __hyp_reloc_end = .; \
+ }
+
#else /* CONFIG_KVM */
#define HYPERVISOR_EXTABLE
#define HYPERVISOR_DATA_SECTIONS
#define HYPERVISOR_PERCPU_SECTION
+#define HYPERVISOR_RELOC_SECTION
#endif
#define HYPERVISOR_TEXT \
@@ -216,6 +226,8 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
+ HYPERVISOR_RELOC_SECTION
+
.rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 13b017284bf9..589921392cb1 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o \
- arch_timer.o \
+ arch_timer.o trng.o\
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe60d25c000e..bb85da1d5880 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1750,11 +1750,10 @@ static int init_hyp_mode(void)
goto out_err;
}
- err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start),
- kvm_ksym_ref(__hyp_data_ro_after_init_end),
- PAGE_HYP_RO);
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
+ kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
- kvm_err("Cannot map .hyp.data..ro_after_init section\n");
+ kvm_err("Cannot map .hyp.rodata section\n");
goto out_err;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 84473574c2e7..54f4860cd87c 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -505,8 +505,8 @@ static inline void __kvm_unexpected_el2_exception(void)
struct exception_table_entry *entry, *end;
unsigned long elr_el2 = read_sysreg(elr_el2);
- entry = hyp_symbol_addr(__start___kvm_ex_table);
- end = hyp_symbol_addr(__stop___kvm_ex_table);
+ entry = &__start___kvm_ex_table;
+ end = &__stop___kvm_ex_table;
while (entry < end) {
addr = (unsigned long)&entry->insn + entry->insn;
diff --git a/arch/arm64/kvm/hyp/nvhe/.gitignore b/arch/arm64/kvm/hyp/nvhe/.gitignore
index 695d73d0249e..5b6c43cc96f8 100644
--- a/arch/arm64/kvm/hyp/nvhe/.gitignore
+++ b/arch/arm64/kvm/hyp/nvhe/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
+gen-hyprel
hyp.lds
+hyp-reloc.S
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 1f1e351c5fe2..a6707df4f6c0 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -3,8 +3,11 @@
# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
#
-asflags-y := -D__KVM_NVHE_HYPERVISOR__
-ccflags-y := -D__KVM_NVHE_HYPERVISOR__
+asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
+
+hostprogs := gen-hyprel
+HOST_EXTRACFLAGS += -I$(objtree)/include
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o
@@ -19,7 +22,7 @@ obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y))
obj-y := kvm_nvhe.o
-extra-y := $(hyp-obj) kvm_nvhe.tmp.o hyp.lds
+extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o
# 1) Compile all source files to `.nvhe.o` object files. The file extension
# avoids file name clashes for files shared with VHE.
@@ -42,11 +45,31 @@ LDFLAGS_kvm_nvhe.tmp.o := -r -T
$(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE
$(call if_changed,ld)
-# 4) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
+# 4) Generate list of hyp code/data positions that need to be relocated at
+# runtime. Because the hypervisor is part of the kernel binary, relocations
+# produce a kernel VA. We enumerate relocations targeting hyp at build time
+# and convert the kernel VAs at those positions to hyp VAs.
+$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel
+ $(call if_changed,hyprel)
+
+# 5) Compile hyp-reloc.S and link it into the existing partially linked object.
+# The object file now contains a section with pointers to hyp positions that
+# will contain kernel VAs at runtime. These pointers have relocations on them
+# so that they get updated as the hyp object is linked into `vmlinux`.
+LDFLAGS_kvm_nvhe.rel.o := -r
+$(obj)/kvm_nvhe.rel.o: $(obj)/kvm_nvhe.tmp.o $(obj)/hyp-reloc.o FORCE
+ $(call if_changed,ld)
+
+# 6) Produce the final 'kvm_nvhe.o', ready to be linked into 'vmlinux'.
# Prefixes names of ELF symbols with '__kvm_nvhe_'.
-$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.tmp.o FORCE
+$(obj)/kvm_nvhe.o: $(obj)/kvm_nvhe.rel.o FORCE
$(call if_changed,hypcopy)
+# The HYPREL command calls `gen-hyprel` to generate an assembly file with
+# a list of relocations targeting hyp code/data.
+quiet_cmd_hyprel = HYPREL $@
+ cmd_hyprel = $(obj)/gen-hyprel $< > $@
+
# The HYPCOPY command uses `objcopy` to prefix all ELF symbol names
# to avoid clashes with VHE code/data.
quiet_cmd_hypcopy = HYPCOPY $@
diff --git a/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
new file mode 100644
index 000000000000..ead02c6a7628
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/gen-hyprel.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 - Google LLC
+ * Author: David Brazdil <dbrazdil@google.com>
+ *
+ * Generates relocation information used by the kernel to convert
+ * absolute addresses in hyp data from kernel VAs to hyp VAs.
+ *
+ * This is necessary because hyp code is linked into the same binary
+ * as the kernel but executes under different memory mappings.
+ * If the compiler used absolute addressing, those addresses need to
+ * be converted before they are used by hyp code.
+ *
+ * The input of this program is the relocatable ELF object containing
+ * all hyp code/data, not yet linked into vmlinux. Hyp section names
+ * should have been prefixed with `.hyp` at this point.
+ *
+ * The output (printed to stdout) is an assembly file containing
+ * an array of 32-bit integers and static relocations that instruct
+ * the linker of `vmlinux` to populate the array entries with offsets
+ * to positions in the kernel binary containing VAs used by hyp code.
+ *
+ * Note that dynamic relocations could be used for the same purpose.
+ * However, those are only generated if CONFIG_RELOCATABLE=y.
+ */
+
+#include <elf.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <generated/autoconf.h>
+
+#define HYP_SECTION_PREFIX ".hyp"
+#define HYP_RELOC_SECTION ".hyp.reloc"
+#define HYP_SECTION_SYMBOL_PREFIX "__hyp_section_"
+
+/*
+ * AArch64 relocation type constants.
+ * Included in case these are not defined in the host toolchain.
+ */
+#ifndef R_AARCH64_ABS64
+#define R_AARCH64_ABS64 257
+#endif
+#ifndef R_AARCH64_LD_PREL_LO19
+#define R_AARCH64_LD_PREL_LO19 273
+#endif
+#ifndef R_AARCH64_ADR_PREL_LO21
+#define R_AARCH64_ADR_PREL_LO21 274
+#endif
+#ifndef R_AARCH64_ADR_PREL_PG_HI21
+#define R_AARCH64_ADR_PREL_PG_HI21 275
+#endif
+#ifndef R_AARCH64_ADR_PREL_PG_HI21_NC
+#define R_AARCH64_ADR_PREL_PG_HI21_NC 276
+#endif
+#ifndef R_AARCH64_ADD_ABS_LO12_NC
+#define R_AARCH64_ADD_ABS_LO12_NC 277
+#endif
+#ifndef R_AARCH64_LDST8_ABS_LO12_NC
+#define R_AARCH64_LDST8_ABS_LO12_NC 278
+#endif
+#ifndef R_AARCH64_TSTBR14
+#define R_AARCH64_TSTBR14 279
+#endif
+#ifndef R_AARCH64_CONDBR19
+#define R_AARCH64_CONDBR19 280
+#endif
+#ifndef R_AARCH64_JUMP26
+#define R_AARCH64_JUMP26 282
+#endif
+#ifndef R_AARCH64_CALL26
+#define R_AARCH64_CALL26 283
+#endif
+#ifndef R_AARCH64_LDST16_ABS_LO12_NC
+#define R_AARCH64_LDST16_ABS_LO12_NC 284
+#endif
+#ifndef R_AARCH64_LDST32_ABS_LO12_NC
+#define R_AARCH64_LDST32_ABS_LO12_NC 285
+#endif
+#ifndef R_AARCH64_LDST64_ABS_LO12_NC
+#define R_AARCH64_LDST64_ABS_LO12_NC 286
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G0
+#define R_AARCH64_MOVW_PREL_G0 287
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G0_NC
+#define R_AARCH64_MOVW_PREL_G0_NC 288
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G1
+#define R_AARCH64_MOVW_PREL_G1 289
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G1_NC
+#define R_AARCH64_MOVW_PREL_G1_NC 290
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G2
+#define R_AARCH64_MOVW_PREL_G2 291
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G2_NC
+#define R_AARCH64_MOVW_PREL_G2_NC 292
+#endif
+#ifndef R_AARCH64_MOVW_PREL_G3
+#define R_AARCH64_MOVW_PREL_G3 293
+#endif
+#ifndef R_AARCH64_LDST128_ABS_LO12_NC
+#define R_AARCH64_LDST128_ABS_LO12_NC 299
+#endif
+
+/* Global state of the processed ELF. */
+static struct {
+ const char *path;
+ char *begin;
+ size_t size;
+ Elf64_Ehdr *ehdr;
+ Elf64_Shdr *sh_table;
+ const char *sh_string;
+} elf;
+
+#if defined(CONFIG_CPU_LITTLE_ENDIAN)
+
+#define elf16toh(x) le16toh(x)
+#define elf32toh(x) le32toh(x)
+#define elf64toh(x) le64toh(x)
+
+#define ELFENDIAN ELFDATA2LSB
+
+#elif defined(CONFIG_CPU_BIG_ENDIAN)
+
+#define elf16toh(x) be16toh(x)
+#define elf32toh(x) be32toh(x)
+#define elf64toh(x) be64toh(x)
+
+#define ELFENDIAN ELFDATA2MSB
+
+#else
+
+#error PDP-endian sadly unsupported...
+
+#endif
+
+#define fatal_error(fmt, ...) \
+ ({ \
+ fprintf(stderr, "error: %s: " fmt "\n", \
+ elf.path, ## __VA_ARGS__); \
+ exit(EXIT_FAILURE); \
+ __builtin_unreachable(); \
+ })
+
+#define fatal_perror(msg) \
+ ({ \
+ fprintf(stderr, "error: %s: " msg ": %s\n", \
+ elf.path, strerror(errno)); \
+ exit(EXIT_FAILURE); \
+ __builtin_unreachable(); \
+ })
+
+#define assert_op(lhs, rhs, fmt, op) \
+ ({ \
+ typeof(lhs) _lhs = (lhs); \
+ typeof(rhs) _rhs = (rhs); \
+ \
+ if (!(_lhs op _rhs)) { \
+ fatal_error("assertion " #lhs " " #op " " #rhs \
+ " failed (lhs=" fmt ", rhs=" fmt \
+ ", line=%d)", _lhs, _rhs, __LINE__); \
+ } \
+ })
+
+#define assert_eq(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, ==)
+#define assert_ne(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, !=)
+#define assert_lt(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, <)
+#define assert_ge(lhs, rhs, fmt) assert_op(lhs, rhs, fmt, >=)
+
+/*
+ * Return a pointer of a given type at a given offset from
+ * the beginning of the ELF file.
+ */
+#define elf_ptr(type, off) ((type *)(elf.begin + (off)))
+
+/* Iterate over all sections in the ELF. */
+#define for_each_section(var) \
+ for (var = elf.sh_table; var < elf.sh_table + elf16toh(elf.ehdr->e_shnum); ++var)
+
+/* Iterate over all Elf64_Rela relocations in a given section. */
+#define for_each_rela(shdr, var) \
+ for (var = elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset)); \
+ var < elf_ptr(Elf64_Rela, elf64toh(shdr->sh_offset) + elf64toh(shdr->sh_size)); var++)
+
+/* True if a string starts with a given prefix. */
+static inline bool starts_with(const char *str, const char *prefix)
+{
+ return memcmp(str, prefix, strlen(prefix)) == 0;
+}
+
+/* Returns a string containing the name of a given section. */
+static inline const char *section_name(Elf64_Shdr *shdr)
+{
+ return elf.sh_string + elf32toh(shdr->sh_name);
+}
+
+/* Returns a pointer to the first byte of section data. */
+static inline const char *section_begin(Elf64_Shdr *shdr)
+{
+ return elf_ptr(char, elf64toh(shdr->sh_offset));
+}
+
+/* Find a section by its offset from the beginning of the file. */
+static inline Elf64_Shdr *section_by_off(Elf64_Off off)
+{
+ assert_ne(off, 0UL, "%lu");
+ return elf_ptr(Elf64_Shdr, off);
+}
+
+/* Find a section by its index. */
+static inline Elf64_Shdr *section_by_idx(uint16_t idx)
+{
+ assert_ne(idx, SHN_UNDEF, "%u");
+ return &elf.sh_table[idx];
+}
+
+/*
+ * Memory-map the given ELF file, perform sanity checks, and
+ * populate global state.
+ */
+static void init_elf(const char *path)
+{
+ int fd, ret;
+ struct stat stat;
+
+ /* Store path in the global struct for error printing. */
+ elf.path = path;
+
+ /* Open the ELF file. */
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ fatal_perror("Could not open ELF file");
+
+ /* Get status of ELF file to obtain its size. */
+ ret = fstat(fd, &stat);
+ if (ret < 0) {
+ close(fd);
+ fatal_perror("Could not get status of ELF file");
+ }
+
+ /* mmap() the entire ELF file read-only at an arbitrary address. */
+ elf.begin = mmap(0, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (elf.begin == MAP_FAILED) {
+ close(fd);
+ fatal_perror("Could not mmap ELF file");
+ }
+
+ /* mmap() was successful, close the FD. */
+ close(fd);
+
+ /* Get pointer to the ELF header. */
+ assert_ge(stat.st_size, sizeof(*elf.ehdr), "%lu");
+ elf.ehdr = elf_ptr(Elf64_Ehdr, 0);
+
+ /* Check the ELF magic. */
+ assert_eq(elf.ehdr->e_ident[EI_MAG0], ELFMAG0, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG1], ELFMAG1, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG2], ELFMAG2, "0x%x");
+ assert_eq(elf.ehdr->e_ident[EI_MAG3], ELFMAG3, "0x%x");
+
+ /* Sanity check that this is an ELF64 relocatable object for AArch64. */
+ assert_eq(elf.ehdr->e_ident[EI_CLASS], ELFCLASS64, "%u");
+ assert_eq(elf.ehdr->e_ident[EI_DATA], ELFENDIAN, "%u");
+ assert_eq(elf16toh(elf.ehdr->e_type), ET_REL, "%u");
+ assert_eq(elf16toh(elf.ehdr->e_machine), EM_AARCH64, "%u");
+
+ /* Populate fields of the global struct. */
+ elf.sh_table = section_by_off(elf64toh(elf.ehdr->e_shoff));
+ elf.sh_string = section_begin(section_by_idx(elf16toh(elf.ehdr->e_shstrndx)));
+}
+
+/* Print the prologue of the output ASM file. */
+static void emit_prologue(void)
+{
+ printf(".data\n"
+ ".pushsection " HYP_RELOC_SECTION ", \"a\"\n");
+}
+
+/* Print ASM statements needed as a prologue to a processed hyp section. */
+static void emit_section_prologue(const char *sh_orig_name)
+{
+ /* Declare the hyp section symbol. */
+ printf(".global %s%s\n", HYP_SECTION_SYMBOL_PREFIX, sh_orig_name);
+}
+
+/*
+ * Print ASM statements to create a hyp relocation entry for a given
+ * R_AARCH64_ABS64 relocation.
+ *
+ * The linker of vmlinux will populate the position given by `rela` with
+ * an absolute 64-bit kernel VA. If the kernel is relocatable, it will
+ * also generate a dynamic relocation entry so that the kernel can shift
+ * the address at runtime for KASLR.
+ *
+ * Emit a 32-bit offset from the current address to the position given
+ * by `rela`. This way the kernel can iterate over all kernel VAs used
+ * by hyp at runtime and convert them to hyp VAs. However, that offset
+ * will not be known until linking of `vmlinux`, so emit a PREL32
+ * relocation referencing a symbol that the hyp linker script put at
+ * the beginning of the relocated section + the offset from `rela`.
+ */
+static void emit_rela_abs64(Elf64_Rela *rela, const char *sh_orig_name)
+{
+ /* Offset of this reloc from the beginning of HYP_RELOC_SECTION. */
+ static size_t reloc_offset;
+
+ /* Create storage for the 32-bit offset. */
+ printf(".word 0\n");
+
+ /*
+ * Create a PREL32 relocation which instructs the linker of `vmlinux`
+ * to insert offset to position <base> + <offset>, where <base> is
+ * a symbol at the beginning of the relocated section, and <offset>
+ * is `rela->r_offset`.
+ */
+ printf(".reloc %lu, R_AARCH64_PREL32, %s%s + 0x%lx\n",
+ reloc_offset, HYP_SECTION_SYMBOL_PREFIX, sh_orig_name,
+ elf64toh(rela->r_offset));
+
+ reloc_offset += 4;
+}
+
+/* Print the epilogue of the output ASM file. */
+static void emit_epilogue(void)
+{
+ printf(".popsection\n");
+}
+
+/*
+ * Iterate over all RELA relocations in a given section and emit
+ * hyp relocation data for all absolute addresses in hyp code/data.
+ *
+ * Static relocations that generate PC-relative-addressing are ignored.
+ * Failure is reported for unexpected relocation types.
+ */
+static void emit_rela_section(Elf64_Shdr *sh_rela)
+{
+ Elf64_Shdr *sh_orig = &elf.sh_table[elf32toh(sh_rela->sh_info)];
+ const char *sh_orig_name = section_name(sh_orig);
+ Elf64_Rela *rela;
+
+ /* Skip all non-hyp sections. */
+ if (!starts_with(sh_orig_name, HYP_SECTION_PREFIX))
+ return;
+
+ emit_section_prologue(sh_orig_name);
+
+ for_each_rela(sh_rela, rela) {
+ uint32_t type = (uint32_t)elf64toh(rela->r_info);
+
+ /* Check that rela points inside the relocated section. */
+ assert_lt(elf64toh(rela->r_offset), elf64toh(sh_orig->sh_size), "0x%lx");
+
+ switch (type) {
+ /*
+ * Data relocations to generate absolute addressing.
+ * Emit a hyp relocation.
+ */
+ case R_AARCH64_ABS64:
+ emit_rela_abs64(rela, sh_orig_name);
+ break;
+ /* Allow relocations to generate PC-relative addressing. */
+ case R_AARCH64_LD_PREL_LO19:
+ case R_AARCH64_ADR_PREL_LO21:
+ case R_AARCH64_ADR_PREL_PG_HI21:
+ case R_AARCH64_ADR_PREL_PG_HI21_NC:
+ case R_AARCH64_ADD_ABS_LO12_NC:
+ case R_AARCH64_LDST8_ABS_LO12_NC:
+ case R_AARCH64_LDST16_ABS_LO12_NC:
+ case R_AARCH64_LDST32_ABS_LO12_NC:
+ case R_AARCH64_LDST64_ABS_LO12_NC:
+ case R_AARCH64_LDST128_ABS_LO12_NC:
+ break;
+ /* Allow relative relocations for control-flow instructions. */
+ case R_AARCH64_TSTBR14:
+ case R_AARCH64_CONDBR19:
+ case R_AARCH64_JUMP26:
+ case R_AARCH64_CALL26:
+ break;
+ /* Allow group relocations to create PC-relative offset inline. */
+ case R_AARCH64_MOVW_PREL_G0:
+ case R_AARCH64_MOVW_PREL_G0_NC:
+ case R_AARCH64_MOVW_PREL_G1:
+ case R_AARCH64_MOVW_PREL_G1_NC:
+ case R_AARCH64_MOVW_PREL_G2:
+ case R_AARCH64_MOVW_PREL_G2_NC:
+ case R_AARCH64_MOVW_PREL_G3:
+ break;
+ default:
+ fatal_error("Unexpected RELA type %u", type);
+ }
+ }
+}
+
+/* Iterate over all sections and emit hyp relocation data for RELA sections. */
+static void emit_all_relocs(void)
+{
+ Elf64_Shdr *shdr;
+
+ for_each_section(shdr) {
+ switch (elf32toh(shdr->sh_type)) {
+ case SHT_REL:
+ fatal_error("Unexpected SHT_REL section \"%s\"",
+ section_name(shdr));
+ case SHT_RELA:
+ emit_rela_section(shdr);
+ break;
+ }
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <elf_input>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ init_elf(argv[1]);
+
+ emit_prologue();
+ emit_all_relocs();
+ emit_epilogue();
+
+ return EXIT_SUCCESS;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index a820dfdc9c25..6585a7cbbc56 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -74,27 +74,28 @@ SYM_FUNC_END(__host_enter)
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
*/
SYM_FUNC_START(__hyp_do_panic)
- /* Load the format arguments into x1-7 */
- mov x6, x3
- get_vcpu_ptr x7, x3
-
- mrs x3, esr_el2
- mrs x4, far_el2
- mrs x5, hpfar_el2
-
/* Prepare and exit to the host's panic funciton. */
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
PSR_MODE_EL1h)
msr spsr_el2, lr
ldr lr, =panic
+ hyp_kimg_va lr, x6
msr elr_el2, lr
- /*
- * Set the panic format string and enter the host, conditionally
- * restoring the host context.
- */
+ /* Set the panic format string. Use the, now free, LR as scratch. */
+ ldr lr, =__hyp_panic_string
+ hyp_kimg_va lr, x6
+
+ /* Load the format arguments into x1-7. */
+ mov x6, x3
+ get_vcpu_ptr x7, x3
+ mrs x3, esr_el2
+ mrs x4, far_el2
+ mrs x5, hpfar_el2
+
+ /* Enter the host, conditionally restoring the host context. */
cmp x0, xzr
- ldr x0, =__hyp_panic_string
+ mov x0, lr
b.eq __host_enter_without_restoring
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
@@ -124,7 +125,7 @@ SYM_FUNC_END(__hyp_do_panic)
* Preserve x0-x4, which may contain stub parameters.
*/
ldr x5, =__kvm_handle_stub_hvc
- kimg_pa x5, x6
+ hyp_pa x5, x6
br x5
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index b17bf19217f1..50be6f4e6b99 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -18,7 +18,7 @@
#include <asm/virt.h>
.text
- .pushsection .hyp.idmap.text, "ax"
+ .pushsection .idmap.text, "ax"
.align 11
@@ -57,17 +57,10 @@ __do_hyp_init:
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
- // We only actively check bits [24:31], and everything
- // else has to be zero, which we check at build time.
-#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF)
-#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value
-#endif
-
- ror x0, x0, #24
- eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF)
- ror x0, x0, #4
- eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF)
- cbz x0, 1f
+ mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init)
+ cmp x0, x3
+ b.eq 1f
+
mov x0, #SMCCC_RET_NOT_SUPPORTED
eret
@@ -141,7 +134,6 @@ alternative_else_nop_endif
/* Set the host vector */
ldr x0, =__kvm_hyp_host_vector
- kimg_hyp_va x0, x1
msr vbar_el2, x0
ret
@@ -200,7 +192,6 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
/* Leave idmap. */
mov x0, x29
ldr x1, =kvm_host_psci_cpu_entry
- kimg_hyp_va x1, x2
br x1
SYM_CODE_END(__kvm_hyp_init_cpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a906f9e2ff34..f012f8665ecc 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -108,9 +108,9 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
typedef void (*hcall_t)(struct kvm_cpu_context *);
-#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x)
+#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
-static const hcall_t *host_hcall[] = {
+static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
@@ -130,7 +130,6 @@ static const hcall_t *host_hcall[] = {
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
- const hcall_t *kfn;
hcall_t hfn;
id -= KVM_HOST_SMCCC_ID(0);
@@ -138,13 +137,11 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
if (unlikely(id >= ARRAY_SIZE(host_hcall)))
goto inval;
- kfn = host_hcall[id];
- if (unlikely(!kfn))
+ hfn = host_hcall[id];
+ if (unlikely(!hfn))
goto inval;
cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
-
- hfn = kimg_fn_hyp_va(kfn);
hfn(host_ctxt);
return;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
index 2997aa156d8e..879559057dee 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -33,8 +33,8 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
hyp_panic();
- cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base);
+ cpu_base_array = (unsigned long *)&kvm_arm_hyp_percpu_base;
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
- elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start);
+ elf_base = (unsigned long)&__per_cpu_start;
return this_cpu_base - elf_base;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index 1206d0d754d5..cd119d82d8e3 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -12,14 +12,17 @@
#include <asm/memory.h>
SECTIONS {
+ HYP_SECTION(.idmap.text)
HYP_SECTION(.text)
+ HYP_SECTION(.data..ro_after_init)
+ HYP_SECTION(.rodata)
+
/*
* .hyp..data..percpu needs to be page aligned to maintain the same
* alignment for when linking into vmlinux.
*/
. = ALIGN(PAGE_SIZE);
- HYP_SECTION_NAME(.data..percpu) : {
+ BEGIN_HYP_SECTION(.data..percpu)
PERCPU_INPUT(L1_CACHE_BYTES)
- }
- HYP_SECTION(.data..ro_after_init)
+ END_HYP_SECTION
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 8e7128cb7667..63de71c0481e 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -128,8 +128,8 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
if (cpu_id == INVALID_CPU_ID)
return PSCI_RET_INVALID_PARAMS;
- boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id);
- init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id);
+ boot_args = per_cpu_ptr(&cpu_on_args, cpu_id);
+ init_params = per_cpu_ptr(&kvm_init_params, cpu_id);
/* Check if the target CPU is already being booted. */
if (!try_acquire_boot_args(boot_args))
@@ -140,7 +140,7 @@ static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt)
wmb();
ret = psci_call(func_id, mpidr,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)),
+ __hyp_pa(&kvm_hyp_cpu_entry),
__hyp_pa(init_params));
/* If successful, the lock will be released by the target CPU. */
@@ -159,8 +159,8 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;
- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
- init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+ boot_args = this_cpu_ptr(&suspend_args);
+ init_params = this_cpu_ptr(&kvm_init_params);
/*
* No need to acquire a lock before writing to boot_args because a core
@@ -174,7 +174,7 @@ static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
* point if it is a deep sleep state.
*/
return psci_call(func_id, power_state,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(&kvm_hyp_cpu_resume),
__hyp_pa(init_params));
}
@@ -186,8 +186,8 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
struct psci_boot_args *boot_args;
struct kvm_nvhe_init_params *init_params;
- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
- init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params));
+ boot_args = this_cpu_ptr(&suspend_args);
+ init_params = this_cpu_ptr(&kvm_init_params);
/*
* No need to acquire a lock before writing to boot_args because a core
@@ -198,7 +198,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
/* Will only return on error. */
return psci_call(func_id,
- __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)),
+ __hyp_pa(&kvm_hyp_cpu_resume),
__hyp_pa(init_params), 0);
}
@@ -207,12 +207,12 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt;
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
if (is_cpu_on)
- boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args));
+ boot_args = this_cpu_ptr(&cpu_on_args);
else
- boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args));
+ boot_args = this_cpu_ptr(&suspend_args);
cpu_reg(host_ctxt, 0) = boot_args->r0;
write_sysreg_el2(boot_args->pc, SYS_ELR);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index bdf8e55ed308..4d177ce1d536 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -45,6 +45,10 @@
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
struct kvm_pgtable_walk_data {
struct kvm_pgtable *pgt;
struct kvm_pgtable_walker *walker;
@@ -170,10 +174,9 @@ static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
smp_store_release(ptep, pte);
}
-static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
- u32 level)
+static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
{
- kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
+ kvm_pte_t pte = kvm_phys_to_pte(pa);
u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
KVM_PTE_TYPE_BLOCK;
@@ -181,12 +184,7 @@ static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
pte |= FIELD_PREP(KVM_PTE_TYPE, type);
pte |= KVM_PTE_VALID;
- /* Tolerate KVM recreating the exact same mapping. */
- if (kvm_pte_valid(old))
- return old == pte;
-
- smp_store_release(ptep, pte);
- return true;
+ return pte;
}
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
@@ -341,12 +339,17 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
kvm_pte_t *ptep, struct hyp_map_data *data)
{
+ kvm_pte_t new, old = *ptep;
u64 granule = kvm_granule_size(level), phys = data->phys;
if (!kvm_block_mapping_supported(addr, end, phys, level))
return false;
- WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
+ /* Tolerate KVM recreating the exact same mapping */
+ new = kvm_init_valid_leaf_pte(phys, data->attr, level);
+ if (old != new && !WARN_ON(kvm_pte_valid(old)))
+ smp_store_release(ptep, new);
+
data->phys += granule;
return true;
}
@@ -461,34 +464,41 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
return 0;
}
-static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
- kvm_pte_t *ptep,
- struct stage2_map_data *data)
+static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
+ kvm_pte_t *ptep,
+ struct stage2_map_data *data)
{
+ kvm_pte_t new, old = *ptep;
u64 granule = kvm_granule_size(level), phys = data->phys;
+ struct page *page = virt_to_page(ptep);
if (!kvm_block_mapping_supported(addr, end, phys, level))
- return false;
-
- /*
- * If the PTE was already valid, drop the refcount on the table
- * early, as it will be bumped-up again in stage2_map_walk_leaf().
- * This ensures that the refcount stays constant across a valid to
- * valid PTE update.
- */
- if (kvm_pte_valid(*ptep))
- put_page(virt_to_page(ptep));
-
- if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
- goto out;
+ return -E2BIG;
+
+ new = kvm_init_valid_leaf_pte(phys, data->attr, level);
+ if (kvm_pte_valid(old)) {
+ /*
+ * Skip updating the PTE if we are trying to recreate the exact
+ * same mapping or only change the access permissions. Instead,
+ * the vCPU will exit one more time from guest if still needed
+ * and then go through the path of relaxing permissions.
+ */
+ if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)))
+ return -EAGAIN;
+
+ /*
+ * There's an existing different valid leaf entry, so perform
+ * break-before-make.
+ */
+ kvm_set_invalid_pte(ptep);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
+ put_page(page);
+ }
- /* There's an existing valid leaf entry, so perform break-before-make */
- kvm_set_invalid_pte(ptep);
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
- kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
-out:
+ smp_store_release(ptep, new);
+ get_page(page);
data->phys += granule;
- return true;
+ return 0;
}
static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
@@ -516,6 +526,7 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
struct stage2_map_data *data)
{
+ int ret;
kvm_pte_t *childp, pte = *ptep;
struct page *page = virt_to_page(ptep);
@@ -526,8 +537,9 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
return 0;
}
- if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
- goto out_get_page;
+ ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
+ if (ret != -E2BIG)
+ return ret;
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
return -EINVAL;
@@ -551,9 +563,8 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
}
kvm_set_table_pte(ptep, childp);
-
-out_get_page:
get_page(page);
+
return 0;
}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
index 8f0585640241..87a54375bd6e 100644
--- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -64,7 +64,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
}
rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+ addr = kvm_vgic_global_state.vcpu_hyp_va;
addr += fault_ipa - vgic->vgic_cpu_base;
if (kvm_vcpu_dabt_iswrite(vcpu)) {
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 25ea4ecb6449..ead21b98b620 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -71,6 +71,12 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
if (gpa != GPA_INVALID)
val = gpa;
break;
+ case ARM_SMCCC_TRNG_VERSION:
+ case ARM_SMCCC_TRNG_FEATURES:
+ case ARM_SMCCC_TRNG_GET_UUID:
+ case ARM_SMCCC_TRNG_RND32:
+ case ARM_SMCCC_TRNG_RND64:
+ return kvm_trng_call(vcpu);
default:
return kvm_psci_call(vcpu);
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7d2257cc5438..77cb2d28f2a4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -879,11 +879,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !force_pte)
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
- if (writable) {
+ if (writable)
prot |= KVM_PGTABLE_PROT_W;
- kvm_set_pfn_dirty(pfn);
- mark_page_dirty(kvm, gfn);
- }
if (fault_status != FSC_PERM && !device)
clean_dcache_guest_page(pfn, vma_pagesize);
@@ -911,11 +908,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
memcache);
}
+ /* Mark the page dirty only if the fault is handled successfully */
+ if (writable && !ret) {
+ kvm_set_pfn_dirty(pfn);
+ mark_page_dirty(kvm, gfn);
+ }
+
out_unlock:
spin_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
- return ret;
+ return ret != -EAGAIN ? ret : 0;
}
/* Resolve the access fault by making the page young again. */
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 247422ac78a9..e9ec08b0b070 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -23,11 +23,11 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
static u32 kvm_pmu_event_mask(struct kvm *kvm)
{
switch (kvm->arch.pmuver) {
- case 1: /* ARMv8.0 */
+ case ID_AA64DFR0_PMUVER_8_0:
return GENMASK(9, 0);
- case 4: /* ARMv8.1 */
- case 5: /* ARMv8.4 */
- case 6: /* ARMv8.5 */
+ case ID_AA64DFR0_PMUVER_8_1:
+ case ID_AA64DFR0_PMUVER_8_4:
+ case ID_AA64DFR0_PMUVER_8_5:
return GENMASK(15, 0);
default: /* Shouldn't be here, just for sanity */
WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver);
@@ -795,6 +795,12 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
base = 0;
} else {
val = read_sysreg(pmceid1_el0);
+ /*
+ * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+ * as RAZ
+ */
+ if (vcpu->kvm->arch.pmuver >= ID_AA64DFR0_PMUVER_8_4)
+ val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
base = 32;
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 7c4f79532406..4f2f1e3145de 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -9,6 +9,7 @@
* Christoffer Dall <c.dall@virtualopensystems.com>
*/
+#include <linux/bitfield.h>
#include <linux/bsearch.h>
#include <linux/kvm_host.h>
#include <linux/mm.h>
@@ -700,14 +701,18 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- u64 pmceid;
+ u64 pmceid, mask, shift;
BUG_ON(p->is_write);
if (pmu_access_el0_disabled(vcpu))
return false;
+ get_access_mask(r, &mask, &shift);
+
pmceid = kvm_pmu_get_pmceid(vcpu, (p->Op2 & 1));
+ pmceid &= mask;
+ pmceid >>= shift;
p->regval = pmceid;
@@ -1021,6 +1026,8 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
return true;
}
+#define FEATURE(x) (GENMASK_ULL(x##_SHIFT + 3, x##_SHIFT))
+
/* Read a sanitised cpufeature ID register by sys_reg_desc */
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r, bool raz)
@@ -1028,36 +1035,41 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
u32 id = reg_to_encoding(r);
u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
- if (id == SYS_ID_AA64PFR0_EL1) {
+ switch (id) {
+ case SYS_ID_AA64PFR0_EL1:
if (!vcpu_has_sve(vcpu))
- val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
- val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
- val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT);
- val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT);
- val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT);
- val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT);
- } else if (id == SYS_ID_AA64PFR1_EL1) {
- val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
- } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
- val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
- (0xfUL << ID_AA64ISAR1_API_SHIFT) |
- (0xfUL << ID_AA64ISAR1_GPA_SHIFT) |
- (0xfUL << ID_AA64ISAR1_GPI_SHIFT));
- } else if (id == SYS_ID_AA64DFR0_EL1) {
- u64 cap = 0;
-
- /* Limit guests to PMUv3 for ARMv8.1 */
- if (kvm_vcpu_has_pmu(vcpu))
- cap = ID_AA64DFR0_PMUVER_8_1;
-
+ val &= ~FEATURE(ID_AA64PFR0_SVE);
+ val &= ~FEATURE(ID_AA64PFR0_AMU);
+ val &= ~FEATURE(ID_AA64PFR0_CSV2);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+ val &= ~FEATURE(ID_AA64PFR0_CSV3);
+ val |= FIELD_PREP(FEATURE(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+ break;
+ case SYS_ID_AA64PFR1_EL1:
+ val &= ~FEATURE(ID_AA64PFR1_MTE);
+ break;
+ case SYS_ID_AA64ISAR1_EL1:
+ if (!vcpu_has_ptrauth(vcpu))
+ val &= ~(FEATURE(ID_AA64ISAR1_APA) |
+ FEATURE(ID_AA64ISAR1_API) |
+ FEATURE(ID_AA64ISAR1_GPA) |
+ FEATURE(ID_AA64ISAR1_GPI));
+ break;
+ case SYS_ID_AA64DFR0_EL1:
+ /* Limit debug to ARMv8.0 */
+ val &= ~FEATURE(ID_AA64DFR0_DEBUGVER);
+ val |= FIELD_PREP(FEATURE(ID_AA64DFR0_DEBUGVER), 6);
+ /* Limit guests to PMUv3 for ARMv8.4 */
val = cpuid_feature_cap_perfmon_field(val,
- ID_AA64DFR0_PMUVER_SHIFT,
- cap);
- } else if (id == SYS_ID_DFR0_EL1) {
- /* Limit guests to PMUv3 for ARMv8.1 */
+ ID_AA64DFR0_PMUVER_SHIFT,
+ kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
+ break;
+ case SYS_ID_DFR0_EL1:
+ /* Limit guests to PMUv3 for ARMv8.4 */
val = cpuid_feature_cap_perfmon_field(val,
- ID_DFR0_PERFMON_SHIFT,
- ID_DFR0_PERFMON_8_1);
+ ID_DFR0_PERFMON_SHIFT,
+ kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0);
+ break;
}
return val;
@@ -1493,6 +1505,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ PMU_SYS_REG(SYS_PMINTENCLR_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
+ { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@@ -1720,7 +1733,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 },
};
-static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
@@ -1734,7 +1747,7 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
(((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
(((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
- | (6 << 16) | (el3 << 14) | (el3 << 12));
+ | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
return true;
}
}
@@ -1767,8 +1780,8 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
* guest. Revisit this one day, would this principle change.
*/
static const struct sys_reg_desc cp14_regs[] = {
- /* DBGIDR */
- { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+ /* DBGDIDR */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgdidr },
/* DBGDTRRXext */
{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
@@ -1918,8 +1931,8 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(12), Op2( 3), access_pmovs },
{ Op1( 0), CRn( 9), CRm(12), Op2( 4), access_pmswinc },
{ Op1( 0), CRn( 9), CRm(12), Op2( 5), access_pmselr },
- { Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
- { Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
+ { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 6), access_pmceid },
+ { AA32(LO), Op1( 0), CRn( 9), CRm(12), Op2( 7), access_pmceid },
{ Op1( 0), CRn( 9), CRm(13), Op2( 0), access_pmu_evcntr },
{ Op1( 0), CRn( 9), CRm(13), Op2( 1), access_pmu_evtyper },
{ Op1( 0), CRn( 9), CRm(13), Op2( 2), access_pmu_evcntr },
@@ -1927,6 +1940,10 @@ static const struct sys_reg_desc cp15_regs[] = {
{ Op1( 0), CRn( 9), CRm(14), Op2( 1), access_pminten },
{ Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten },
{ Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs },
+ { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 4), access_pmceid },
+ { AA32(HI), Op1( 0), CRn( 9), CRm(14), Op2( 5), access_pmceid },
+ /* PMMIR */
+ { Op1( 0), CRn( 9), CRm(14), Op2( 6), trap_raz_wi },
/* PRRR/MAIR0 */
{ AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 },
diff --git a/arch/arm64/kvm/trng.c b/arch/arm64/kvm/trng.c
new file mode 100644
index 000000000000..99bdd7103c9c
--- /dev/null
+++ b/arch/arm64/kvm/trng.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+#include <kvm/arm_hypercalls.h>
+
+#define ARM_SMCCC_TRNG_VERSION_1_0 0x10000UL
+
+/* Those values are deliberately separate from the generic SMCCC definitions. */
+#define TRNG_SUCCESS 0UL
+#define TRNG_NOT_SUPPORTED ((unsigned long)-1)
+#define TRNG_INVALID_PARAMETER ((unsigned long)-2)
+#define TRNG_NO_ENTROPY ((unsigned long)-3)
+
+#define TRNG_MAX_BITS64 192
+
+static const uuid_t arm_smc_trng_uuid __aligned(4) = UUID_INIT(
+ 0x0d21e000, 0x4384, 0x11eb, 0x80, 0x70, 0x52, 0x44, 0x55, 0x4e, 0x5a, 0x4c);
+
+static int kvm_trng_do_rnd(struct kvm_vcpu *vcpu, int size)
+{
+ DECLARE_BITMAP(bits, TRNG_MAX_BITS64);
+ u32 num_bits = smccc_get_arg1(vcpu);
+ int i;
+
+ if (num_bits > 3 * size) {
+ smccc_set_retval(vcpu, TRNG_INVALID_PARAMETER, 0, 0, 0);
+ return 1;
+ }
+
+ /* get as many bits as we need to fulfil the request */
+ for (i = 0; i < DIV_ROUND_UP(num_bits, BITS_PER_LONG); i++)
+ bits[i] = get_random_long();
+
+ bitmap_clear(bits, num_bits, TRNG_MAX_BITS64 - num_bits);
+
+ if (size == 32)
+ smccc_set_retval(vcpu, TRNG_SUCCESS, lower_32_bits(bits[1]),
+ upper_32_bits(bits[0]), lower_32_bits(bits[0]));
+ else
+ smccc_set_retval(vcpu, TRNG_SUCCESS, bits[2], bits[1], bits[0]);
+
+ memzero_explicit(bits, sizeof(bits));
+ return 1;
+}
+
+int kvm_trng_call(struct kvm_vcpu *vcpu)
+{
+ const __le32 *u = (__le32 *)arm_smc_trng_uuid.b;
+ u32 func_id = smccc_get_function(vcpu);
+ unsigned long val = TRNG_NOT_SUPPORTED;
+ int size = 64;
+
+ switch (func_id) {
+ case ARM_SMCCC_TRNG_VERSION:
+ val = ARM_SMCCC_TRNG_VERSION_1_0;
+ break;
+ case ARM_SMCCC_TRNG_FEATURES:
+ switch (smccc_get_arg1(vcpu)) {
+ case ARM_SMCCC_TRNG_VERSION:
+ case ARM_SMCCC_TRNG_FEATURES:
+ case ARM_SMCCC_TRNG_GET_UUID:
+ case ARM_SMCCC_TRNG_RND32:
+ case ARM_SMCCC_TRNG_RND64:
+ val = TRNG_SUCCESS;
+ }
+ break;
+ case ARM_SMCCC_TRNG_GET_UUID:
+ smccc_set_retval(vcpu, le32_to_cpu(u[0]), le32_to_cpu(u[1]),
+ le32_to_cpu(u[2]), le32_to_cpu(u[3]));
+ return 1;
+ case ARM_SMCCC_TRNG_RND32:
+ size = 32;
+ fallthrough;
+ case ARM_SMCCC_TRNG_RND64:
+ return kvm_trng_do_rnd(vcpu, size);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+}
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index 70fcd6a12fe1..978301392d67 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -81,6 +81,34 @@ __init void kvm_compute_layout(void)
init_hyp_physvirt_offset();
}
+/*
+ * The .hyp.reloc ELF section contains a list of kimg positions that
+ * contains kimg VAs but will be accessed only in hyp execution context.
+ * Convert them to hyp VAs. See gen-hyprel.c for more details.
+ */
+__init void kvm_apply_hyp_relocations(void)
+{
+ int32_t *rel;
+ int32_t *begin = (int32_t *)__hyp_reloc_begin;
+ int32_t *end = (int32_t *)__hyp_reloc_end;
+
+ for (rel = begin; rel < end; ++rel) {
+ uintptr_t *ptr, kimg_va;
+
+ /*
+ * Each entry contains a 32-bit relative offset from itself
+ * to a kimg VA position.
+ */
+ ptr = (uintptr_t *)lm_alias((char *)rel + *rel);
+
+ /* Read the kimg VA value at the relocation address. */
+ kimg_va = *ptr;
+
+ /* Convert to hyp VA and store back to the relocation address. */
+ *ptr = __early_kern_hyp_va((uintptr_t)lm_alias(kimg_va));
+ }
+}
+
static u32 compute_instruction(int n, u32 rd, u32 rn)
{
u32 insn = AARCH64_BREAK_FAULT;
@@ -255,12 +283,6 @@ static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst
*updptr++ = cpu_to_le32(insn);
}
-void kvm_update_kimg_phys_offset(struct alt_instr *alt,
- __le32 *origptr, __le32 *updptr, int nr_inst)
-{
- generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst);
-}
-
void kvm_get_kimage_voffset(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst)
{
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7deddf56f7c3..709d98fea90c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1;
EXPORT_SYMBOL(memstart_addr);
/*
- * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
- * memory as some devices, namely the Raspberry Pi 4, have peripherals with
- * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
- * bit addressable memory area.
+ * If the corresponding config options are enabled, we create both ZONE_DMA
+ * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
+ * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
+ * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
+ * otherwise it is empty.
*/
phys_addr_t arm64_dma_phys_limit __ro_after_init;
-phys_addr_t arm64_dma32_phys_limit __ro_after_init;
#ifdef CONFIG_KEXEC_CORE
/*
@@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void)
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
- crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
+ crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
unsigned int __maybe_unused acpi_zone_dma_bits;
unsigned int __maybe_unused dt_zone_dma_bits;
+ phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
#ifdef CONFIG_ZONE_DMA
acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
@@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
+ if (!arm64_dma_phys_limit)
+ arm64_dma_phys_limit = dma32_phys_limit;
#endif
+ if (!arm64_dma_phys_limit)
+ arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@@ -394,16 +399,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- arm64_dma32_phys_limit = max_zone_phys(32);
- else
- arm64_dma32_phys_limit = PHYS_MASK + 1;
-
reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
-
- dma_contiguous_reserve(arm64_dma32_phys_limit);
}
void __init bootmem_init(void)
@@ -439,6 +437,11 @@ void __init bootmem_init(void)
zone_sizes_init(min, max);
/*
+ * Reserve the CMA area after arm64_dma_phys_limit was initialised.
+ */
+ dma_contiguous_reserve(arm64_dma_phys_limit);
+
+ /*
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
@@ -455,7 +458,7 @@ void __init bootmem_init(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
+ max_pfn > PFN_DOWN(arm64_dma_phys_limit))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index c61c641674e6..e3946b06e840 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -13,6 +13,7 @@
#include <linux/libfdt.h>
#include <asm/addrspace.h>
+#include <asm/unaligned.h>
/*
* These two variables specify the free mem region
@@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start)
dtb_size = fdt_totalsize((void *)&__appended_dtb);
/* last four bytes is always image size in little endian */
- image_size = le32_to_cpup((void *)&__image_end - 4);
+ image_size = get_unaligned_le32((void *)&__image_end - 4);
/* copy dtb to where the booted kernel will expect it */
memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index bd47e15d02c7..be5d4afcd30f 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1444,7 +1444,7 @@ static void octeon_irq_setup_secondary_ciu2(void)
static int __init octeon_irq_init_ciu(
struct device_node *ciu_node, struct device_node *parent)
{
- unsigned int i, r;
+ int i, r;
struct irq_chip *chip;
struct irq_chip *chip_edge;
struct irq_chip *chip_mbox;
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 6ee3f7218c67..c4441416e96b 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
#undef ns_to_kernel_old_timeval
#define ns_to_kernel_old_timeval ns_to_old_timeval32
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_t compat_long_t
+#define user_siginfo_t compat_siginfo_t
+#define copy_siginfo_to_external copy_siginfo_to_external32
+
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 6dd103d3cebb..7b2a23f48c1a 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
#undef ns_to_kernel_old_timeval
#define ns_to_kernel_old_timeval ns_to_old_timeval32
+/*
+ * Some data types as stored in coredump.
+ */
+#define user_long_t compat_long_t
+#define user_siginfo_t compat_siginfo_t
+#define copy_siginfo_to_external copy_siginfo_to_external32
+
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c
index 47aeb3350a76..0e365b7c742d 100644
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset)
static inline __init unsigned long rotate_xor(unsigned long hash,
const void *area, size_t size)
{
- size_t i;
- unsigned long *ptr = (unsigned long *)area;
+ const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash));
+ size_t diff, i;
+
+ diff = (void *)ptr - area;
+ if (unlikely(size < diff + sizeof(hash)))
+ return hash;
+
+ size = ALIGN_DOWN(size - diff, sizeof(hash));
for (i = 0; i < size / sizeof(hash); i++) {
/* Rotate by odd number of bits and XOR. */
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index 81671aa365b3..77c635c2c90d 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -103,6 +103,8 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz
return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
}
+#ifdef __powerpc64__
+
static __always_inline
int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
{
@@ -115,11 +117,23 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
}
-#ifdef CONFIG_VDSO32
+#else
#define BUILD_VDSO32 1
static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
{
return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8e0b1298bf19..4ab426b8b0e0 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -187,6 +187,12 @@ SECTIONS
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
+
+ /*
+ *.init.text might be RO so we must ensure this section ends on
+ * a page boundary.
+ */
+ . = ALIGN(PAGE_SIZE);
_einittext = .;
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
@@ -200,6 +206,8 @@ SECTIONS
EXIT_TEXT
}
+ . = ALIGN(PAGE_SIZE);
+
INIT_DATA_SECTION(16)
. = ALIGN(8);
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 81b76d44725d..e9e2c1f0a690 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -137,7 +137,7 @@ config PA_BITS
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+ default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
@@ -247,10 +247,12 @@ config MODULE_SECTIONS
choice
prompt "Maximum Physical Memory"
- default MAXPHYSMEM_2GB if 32BIT
+ default MAXPHYSMEM_1GB if 32BIT
default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
+ config MAXPHYSMEM_1GB
+ bool "1GiB"
config MAXPHYSMEM_2GB
bool "2GiB"
config MAXPHYSMEM_128GB
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 4a2729f5ca3f..24d75a146e02 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -88,7 +88,9 @@
phy-mode = "gmii";
phy-handle = <&phy0>;
phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-id0007.0771";
reg = <0>;
+ reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>;
};
};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index d222d353d86d..8c3d1e451703 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
CONFIG_POWER_RESET=y
CONFIG_DRM=y
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 41a72861987c..251e1db088fa 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -99,7 +99,6 @@
| _PAGE_DIRTY)
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
-#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 8454f746bbfd..1453a2f563bc 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -10,7 +10,7 @@
#include <linux/types.h>
-#ifndef GENERIC_TIME_VSYSCALL
+#ifndef CONFIG_GENERIC_TIME_VSYSCALL
struct vdso_data {
};
#endif
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index de59dd457b41..d86781357044 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf)
static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type)
{
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id());
+ /*
+ * Using raw_smp_processor_id() elides a preemptability check, but this
+ * is really indicative of a larger problem: the cacheinfo UABI assumes
+ * that cores have a homonogenous view of the cache hierarchy. That
+ * happens to be the case for the current set of RISC-V systems, but
+ * likely won't be true in general. Since there's no way to provide
+ * correct information for these systems via the current UABI we're
+ * just eliding the check for now.
+ */
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id());
struct cacheinfo *this_leaf;
int index;
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 524d918f3601..744f3209c48d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -124,15 +124,15 @@ skip_context_tracking:
REG_L a1, (a1)
jr a1
1:
-#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_on
-#endif
/*
* Exceptions run with interrupts enabled or disabled depending on the
* state of SR_PIE in m/sstatus.
*/
andi t0, s1, SR_PIE
beqz t0, 1f
+#ifdef CONFIG_TRACE_IRQFLAGS
+ call trace_hardirqs_on
+#endif
csrs CSR_STATUS, SR_IE
1:
@@ -155,6 +155,15 @@ skip_context_tracking:
tail do_trap_unknown
handle_syscall:
+#ifdef CONFIG_RISCV_M_MODE
+ /*
+ * When running is M-Mode (no MMU config), MPIE does not get set.
+ * As a result, we need to force enable interrupts here because
+ * handle_exception did not do set SR_IE as it always sees SR_PIE
+ * being cleared.
+ */
+ csrs CSR_STATUS, SR_IE
+#endif
#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING)
/* Recover a0 - a7 for system calls */
REG_L a0, PT_A0(sp)
@@ -186,14 +195,7 @@ check_syscall_nr:
* Syscall number held in a7.
* If syscall number is above allowed value, redirect to ni_syscall.
*/
- bge a7, t0, 1f
- /*
- * Check if syscall is rejected by tracer, i.e., a7 == -1.
- * If yes, we pretend it was executed.
- */
- li t1, -1
- beq a7, t1, ret_from_syscall_rejected
- blt a7, t1, 1f
+ bgeu a7, t0, 1f
/* Call syscall */
la s0, sys_call_table
slli t0, a7, RISCV_LGPTR
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 1d85e9bf783c..3fa3f26dde85 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -127,7 +127,9 @@ static void __init init_resources(void)
{
struct memblock_region *region = NULL;
struct resource *res = NULL;
- int ret = 0;
+ struct resource *mem_res = NULL;
+ size_t mem_res_sz = 0;
+ int ret = 0, i = 0;
code_res.start = __pa_symbol(_text);
code_res.end = __pa_symbol(_etext) - 1;
@@ -145,16 +147,17 @@ static void __init init_resources(void)
bss_res.end = __pa_symbol(__bss_stop) - 1;
bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res);
+ mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
+ if (!mem_res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
/*
* Start by adding the reserved regions, if they overlap
* with /memory regions, insert_resource later on will take
* care of it.
*/
for_each_reserved_mem_region(region) {
- res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
- if (!res)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- sizeof(struct resource));
+ res = &mem_res[i++];
res->name = "Reserved";
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
@@ -171,8 +174,10 @@ static void __init init_resources(void)
* Ignore any other reserved regions within
* system memory.
*/
- if (memblock_is_memory(res->start))
+ if (memblock_is_memory(res->start)) {
+ memblock_free((phys_addr_t) res, sizeof(struct resource));
continue;
+ }
ret = add_resource(&iomem_resource, res);
if (ret < 0)
@@ -181,10 +186,7 @@ static void __init init_resources(void)
/* Add /memory regions to the resource tree */
for_each_mem_region(region) {
- res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
- if (!res)
- panic("%s: Failed to allocate %zu bytes\n", __func__,
- sizeof(struct resource));
+ res = &mem_res[i++];
if (unlikely(memblock_is_nomap(region))) {
res->name = "Reserved";
@@ -205,9 +207,9 @@ static void __init init_resources(void)
return;
error:
- memblock_free((phys_addr_t) res, sizeof(struct resource));
/* Better an empty resource tree than an inconsistent one */
release_child_resources(&iomem_resource);
+ memblock_free((phys_addr_t) mem_res, mem_res_sz);
}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 48b870a685b3..df5d2da7c40b 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -14,7 +14,7 @@
#include <asm/stacktrace.h>
-register unsigned long sp_in_global __asm__("sp");
+register const unsigned long sp_in_global __asm__("sp");
#ifdef CONFIG_FRAME_POINTER
@@ -28,9 +28,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
sp = user_stack_pointer(regs);
pc = instruction_pointer(regs);
} else if (task == NULL || task == current) {
- const register unsigned long current_sp = sp_in_global;
fp = (unsigned long)__builtin_frame_address(0);
- sp = current_sp;
+ sp = sp_in_global;
pc = (unsigned long)walk_stackframe;
} else {
/* task blocked in __switch_to */
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 4d3a1048ad8b..8a5cf99c0776 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -4,6 +4,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/of_clk.h>
#include <linux/clocksource.h>
#include <linux/delay.h>
#include <asm/sbi.h>
@@ -24,6 +25,8 @@ void __init time_init(void)
riscv_timebase = prop;
lpj_fine = riscv_timebase / HZ;
+
+ of_clk_init(NULL);
timer_probe();
}
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 678204231700..3f1d35e7c98a 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -12,7 +12,7 @@
#include <linux/binfmts.h>
#include <linux/err.h>
#include <asm/page.h>
-#ifdef GENERIC_TIME_VSYSCALL
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
#else
#include <asm/vdso.h>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index bf5379135e39..7cd4993f4ff2 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -157,9 +157,10 @@ disable:
void __init setup_bootmem(void)
{
phys_addr_t mem_start = 0;
- phys_addr_t start, end = 0;
+ phys_addr_t start, dram_end, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
+ phys_addr_t max_mapped_addr = __pa(~(ulong)0);
u64 i;
/* Find the memory region containing the kernel */
@@ -181,7 +182,18 @@ void __init setup_bootmem(void)
/* Reserve from the start of the kernel to the end of the kernel */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
- max_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ dram_end = memblock_end_of_DRAM();
+
+ /*
+ * memblock allocator is not aware of the fact that last 4K bytes of
+ * the addressable memory can not be mapped because of IS_ERR_VALUE
+ * macro. Make sure that last 4k bytes are not usable by memblock
+ * if end of dram is equal to maximum addressable memory.
+ */
+ if (max_mapped_addr == (dram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+
+ max_pfn = PFN_DOWN(dram_end);
max_low_pfn = max_pfn;
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
set_max_mapnr(max_low_pfn);
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 12ddd1f6bf70..a8a2ffd9114a 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -93,8 +93,8 @@ void __init kasan_init(void)
VMALLOC_END));
for_each_mem_range(i, &_start, &_end) {
- void *start = (void *)_start;
- void *end = (void *)_end;
+ void *start = (void *)__va(_start);
+ void *end = (void *)__va(_end);
if (start >= end)
break;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index e04d90af4c27..4638a52d8eae 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -16,6 +16,7 @@
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/idtentry.h>
+#include <linux/kexec.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
@@ -26,6 +27,8 @@
#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
+int hyperv_init_cpuhp;
+
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
@@ -401,6 +404,7 @@ void __init hyperv_init(void)
register_syscore_ops(&hv_syscore_ops);
+ hyperv_init_cpuhp = cpuhp;
return;
remove_cpuhp_state:
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 5208ba49c89a..2c87350c1fb0 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
if (!hv_hypercall_pg)
goto do_native;
- if (cpumask_empty(cpus))
- return;
-
local_irq_save(flags);
+ /*
+ * Only check the mask _after_ interrupt has been disabled to avoid the
+ * mask changing under our feet.
+ */
+ if (cpumask_empty(cpus)) {
+ local_irq_restore(flags);
+ return;
+ }
+
flush_pcpu = (struct hv_tlb_flush **)
this_cpu_ptr(hyperv_pcpu_input_arg);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ffc289992d1b..30f76b966857 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
+extern int hyperv_init_cpuhp;
+
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f628e3dc150f..43b54bef5448 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -135,14 +135,32 @@ static void hv_machine_shutdown(void)
{
if (kexec_in_progress && hv_kexec_handler)
hv_kexec_handler();
+
+ /*
+ * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
+ * corrupts the old VP Assist Pages and can crash the kexec kernel.
+ */
+ if (kexec_in_progress && hyperv_init_cpuhp > 0)
+ cpuhp_remove_state(hyperv_init_cpuhp);
+
+ /* The function calls stop_other_cpus(). */
native_machine_shutdown();
+
+ /* Disable the hypercall page when there is only 1 active CPU. */
+ if (kexec_in_progress)
+ hyperv_cleanup();
}
static void hv_machine_crash_shutdown(struct pt_regs *regs)
{
if (hv_crash_handler)
hv_crash_handler(regs);
+
+ /* The function calls crash_smp_send_stop(). */
native_machine_crash_shutdown(regs);
+
+ /* Disable the hypercall page when there is only 1 active CPU. */
+ hyperv_cleanup();
}
#endif /* CONFIG_KEXEC_CORE */
#endif /* CONFIG_HYPERV */
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 9e87ab010c82..e68ea5f4ad1c 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -164,10 +164,10 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
else
per_cpu(xen_vcpu_id, cpu) = cpu;
rc = xen_vcpu_setup(cpu);
- if (rc)
+ if (rc || !xen_have_vector_callback)
return rc;
- if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
rc = xen_smp_intr_init(cpu);
@@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu)
return 0;
}
+static bool no_vector_callback __initdata;
+
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
@@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void)
xen_panic_handler_init();
- if (xen_feature(XENFEAT_hvm_callback_vector))
+ if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
xen_hvm_smp_init();
@@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg)
}
early_param("xen_nopv", xen_parse_nopv);
+static __init int xen_parse_no_vector_callback(char *arg)
+{
+ no_vector_callback = true;
+ return 0;
+}
+early_param("xen_no_vector_callback", xen_parse_no_vector_callback);
+
bool __init xen_hvm_need_lapic(void)
{
if (xen_pv_domain())
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index f5e7db4f82ab..056430a1080b 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
int cpu;
native_smp_prepare_cpus(max_cpus);
- WARN_ON(xen_smp_intr_init(0));
- xen_init_lock_cpu(0);
+ if (xen_have_vector_callback) {
+ WARN_ON(xen_smp_intr_init(0));
+ xen_init_lock_cpu(0);
+ }
for_each_possible_cpu(cpu) {
if (cpu == 0)
@@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
static void xen_hvm_cpu_die(unsigned int cpu)
{
if (common_cpu_die(cpu) == 0) {
- xen_smp_intr_free(cpu);
- xen_uninit_lock_cpu(cpu);
- xen_teardown_timer(cpu);
+ if (xen_have_vector_callback) {
+ xen_smp_intr_free(cpu);
+ xen_uninit_lock_cpu(cpu);
+ xen_teardown_timer(cpu);
+ }
}
}
#else
@@ -64,14 +68,17 @@ static void xen_hvm_cpu_die(unsigned int cpu)
void __init xen_hvm_smp_init(void)
{
- if (!xen_have_vector_callback)
+ smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+ smp_ops.smp_cpus_done = xen_smp_cpus_done;
+ smp_ops.cpu_die = xen_hvm_cpu_die;
+
+ if (!xen_have_vector_callback) {
+ nopvspin = true;
return;
+ }
- smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
- smp_ops.cpu_die = xen_hvm_cpu_die;
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
- smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
- smp_ops.smp_cpus_done = xen_smp_cpus_done;
}