diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 14:11:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-03 14:11:08 -0700 |
commit | 145ff1ec090dce9beb5a9590b5dc288e7bb2e65d (patch) | |
tree | 3e10a7c59553e56c1ea5f0aa71a2c3c9d6b7982b /arch | |
parent | 8c4e1c027ae63c67c523d695e4e8565ff78af1ba (diff) | |
parent | 0e4cd9f2654915be8d09a1bd1b405ce5426e64c4 (diff) | |
download | linux-145ff1ec090dce9beb5a9590b5dc288e7bb2e65d.tar.bz2 |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 and cross-arch updates from Catalin Marinas:
"Here's a slightly wider-spread set of updates for 5.9.
Going outside the usual arch/arm64/ area is the removal of
read_barrier_depends() series from Will and the MSI/IOMMU ID
translation series from Lorenzo.
The notable arm64 updates include ARMv8.4 TLBI range operations and
translation level hint, time namespace support, and perf.
Summary:
- Removal of the tremendously unpopular read_barrier_depends()
barrier, which is a NOP on all architectures apart from Alpha, in
favour of allowing architectures to override READ_ONCE() and do
whatever dance they need to do to ensure address dependencies
provide LOAD -> LOAD/STORE ordering.
This work also offers a potential solution if compilers are shown
to convert LOAD -> LOAD address dependencies into control
dependencies (e.g. under LTO), as weakly ordered architectures will
effectively be able to upgrade READ_ONCE() to smp_load_acquire().
The latter case is not used yet, but will be discussed further at
LPC.
- Make the MSI/IOMMU input/output ID translation PCI agnostic,
augment the MSI/IOMMU ACPI/OF ID mapping APIs to accept an input ID
bus-specific parameter and apply the resulting changes to the
device ID space provided by the Freescale FSL bus.
- arm64 support for TLBI range operations and translation table level
hints (part of the ARMv8.4 architecture version).
- Time namespace support for arm64.
- Export the virtual and physical address sizes in vmcoreinfo for
makedumpfile and crash utilities.
- CPU feature handling cleanups and checks for programmer errors
(overlapping bit-fields).
- ACPI updates for arm64: disallow AML accesses to EFI code regions
and kernel memory.
- perf updates for arm64.
- Miscellaneous fixes and cleanups, most notably PLT counting
optimisation for module loading, recordmcount fix to ignore
relocations other than R_AARCH64_CALL26, CMA areas reserved for
gigantic pages on 16K and 64K configurations.
- Trivial typos, duplicate words"
Link: http://lkml.kernel.org/r/20200710165203.31284-1-will@kernel.org
Link: http://lkml.kernel.org/r/20200619082013.13661-1-lorenzo.pieralisi@arm.com
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (82 commits)
arm64: use IRQ_STACK_SIZE instead of THREAD_SIZE for irq stack
arm64/mm: save memory access in check_and_switch_context() fast switch path
arm64: sigcontext.h: delete duplicated word
arm64: ptrace.h: delete duplicated word
arm64: pgtable-hwdef.h: delete duplicated words
bus: fsl-mc: Add ACPI support for fsl-mc
bus/fsl-mc: Refactor the MSI domain creation in the DPRC driver
of/irq: Make of_msi_map_rid() PCI bus agnostic
of/irq: make of_msi_map_get_device_domain() bus agnostic
dt-bindings: arm: fsl: Add msi-map device-tree binding for fsl-mc bus
of/device: Add input id to of_dma_configure()
of/iommu: Make of_map_rid() PCI agnostic
ACPI/IORT: Add an input ID to acpi_dma_configure()
ACPI/IORT: Remove useless PCI bus walk
ACPI/IORT: Make iort_msi_map_rid() PCI agnostic
ACPI/IORT: Make iort_get_device_domain IRQ domain agnostic
ACPI/IORT: Make iort_match_node_callback walk the ACPI namespace for NC
arm64: enable time namespace support
arm64/vdso: Restrict splitting VVAR VMA
arm64/vdso: Handle faults on timens page
...
Diffstat (limited to 'arch')
50 files changed, 1016 insertions, 324 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 2144530d1428..2f8f7e54792f 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -16,10 +16,10 @@ /* * To ensure dependency ordering is preserved for the _relaxed and - * _release atomics, an smp_read_barrier_depends() is unconditionally - * inserted into the _relaxed variants, which are used to build the - * barriered versions. Avoid redundant back-to-back fences in the - * _acquire and _fence versions. + * _release atomics, an smp_mb() is unconditionally inserted into the + * _relaxed variants, which are used to build the barriered versions. + * Avoid redundant back-to-back fences in the _acquire and _fence + * versions. */ #define __atomic_acquire_fence() #define __atomic_post_full_fence() @@ -70,7 +70,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -88,7 +88,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -123,7 +123,7 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } @@ -141,7 +141,7 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ ".previous" \ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \ :"Ir" (i), "m" (v->counter) : "memory"); \ - smp_read_barrier_depends(); \ + smp_mb(); \ return result; \ } diff --git a/arch/alpha/include/asm/barrier.h b/arch/alpha/include/asm/barrier.h index 92ec486a4f9e..c56bfffc9918 100644 --- a/arch/alpha/include/asm/barrier.h +++ b/arch/alpha/include/asm/barrier.h @@ -2,64 +2,15 @@ #ifndef __BARRIER_H #define __BARRIER_H -#include <asm/compiler.h> - #define mb() __asm__ __volatile__("mb": : :"memory") #define rmb() __asm__ __volatile__("mb": : :"memory") #define wmb() __asm__ __volatile__("wmb": : :"memory") -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * <programlisting> - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * </programlisting> - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * <programlisting> - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * </programlisting> - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - */ -#define read_barrier_depends() __asm__ __volatile__("mb": : :"memory") +#define __smp_load_acquire(p) \ +({ \ + compiletime_assert_atomic_type(*p); \ + __READ_ONCE(*p); \ +}) #ifdef CONFIG_SMP #define __ASM_SMP_MB "\tmb\n" diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 162c17b2631f..660b14ce1317 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -277,9 +277,9 @@ extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; retur extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } /* - * The smp_read_barrier_depends() in the following functions are required to - * order the load of *dir (the pointer in the top level page table) with any - * subsequent load of the returned pmd_t *ret (ret is data dependent on *dir). + * The smp_rmb() in the following functions are required to order the load of + * *dir (the pointer in the top level page table) with any subsequent load of + * the returned pmd_t *ret (ret is data dependent on *dir). * * If this ordering is not enforced, the CPU might load an older value of * *ret, which may be uninitialized data. See mm/memory.c:__pte_alloc for @@ -293,7 +293,7 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) { pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); - smp_read_barrier_depends(); /* see above */ + smp_rmb(); /* see above */ return ret; } #define pmd_offset pmd_offset @@ -303,7 +303,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) { pte_t *ret = (pte_t *) pmd_page_vaddr(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PAGE - 1)); - smp_read_barrier_depends(); /* see above */ + smp_rmb(); /* see above */ return ret; } #define pte_offset_kernel pte_offset_kernel diff --git a/arch/alpha/include/asm/rwonce.h b/arch/alpha/include/asm/rwonce.h new file mode 100644 index 000000000000..35542bcf92b3 --- /dev/null +++ b/arch/alpha/include/asm/rwonce.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Google LLC. + */ +#ifndef __ASM_RWONCE_H +#define __ASM_RWONCE_H + +#ifdef CONFIG_SMP + +#include <asm/barrier.h> + +/* + * Alpha is apparently daft enough to reorder address-dependent loads + * on some CPU implementations. Knock some common sense into it with + * a memory barrier in READ_ONCE(). + * + * For the curious, more information about this unusual reordering is + * available in chapter 15 of the "perfbook": + * + * https://kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html + * + */ +#define __READ_ONCE(x) \ +({ \ + __unqual_scalar_typeof(x) __x = \ + (*(volatile typeof(__x) *)(&(x))); \ + mb(); \ + (typeof(x))__x; \ +}) + +#endif /* CONFIG_SMP */ + +#include <asm-generic/rwonce.h> + +#endif /* __ASM_RWONCE_H */ diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 36dc18553ed8..1b207cf07697 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/errno.h> #include <asm/unistd.h> #include <asm/vdso/cp15.h> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 66dc41fd49f2..73aee7290cdf 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -118,6 +118,7 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_TIME_NS select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -1327,6 +1328,8 @@ config SWP_EMULATION ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that they are always undefined. Say Y here to enable software emulation of these instructions for userspace using LDXR/STXR. + This feature can be controlled at runtime with the abi.swp + sysctl which is disabled by default. In some older versions of glibc [<=2.8] SWP is used during futex trylock() operations with the assumption that the code will not @@ -1353,7 +1356,8 @@ config CP15_BARRIER_EMULATION Say Y here to enable software emulation of these instructions for AArch32 userspace code. When this option is enabled, CP15 barrier usage is traced which can help - identify software that needs updating. + identify software that needs updating. This feature can be + controlled at runtime with the abi.cp15_barrier sysctl. If unsure, say Y @@ -1364,7 +1368,8 @@ config SETEND_EMULATION AArch32 EL0, and is deprecated in ARMv8. Say Y here to enable software emulation of the instruction - for AArch32 userspace code. + for AArch32 userspace code. This feature can be controlled + at runtime with the abi.setend sysctl. Note: All the cpus on the system must have mixed endian support at EL0 for this feature to be enabled. If a new CPU - which doesn't support mixed @@ -1596,6 +1601,20 @@ config ARM64_AMU_EXTN correctly reflect reality. Most commonly, the value read will be 0, indicating that the counter is not enabled. +config AS_HAS_ARMV8_4 + def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a) + +config ARM64_TLB_RANGE + bool "Enable support for tlbi range feature" + default y + depends on AS_HAS_ARMV8_4 + help + ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a + range of input addresses. + + The feature introduces new assembly instructions, and they were + support when binutils >= 2.30. + endmenu menu "ARMv8.5 architectural features" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 70f5905954dd..55bc8546d9c7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -82,11 +82,18 @@ endif # compiler to generate them and consequently to break the single image contract # we pass it only to the assembler. This option is utilized only in case of non # integrated assemblers. +ifneq ($(CONFIG_AS_HAS_ARMV8_4), y) branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a endif +endif KBUILD_CFLAGS += $(branch-prot-flags-y) +ifeq ($(CONFIG_AS_HAS_ARMV8_4), y) +# make sure to pass the newest target architecture to -march. +KBUILD_CFLAGS += -Wa,-march=armv8.4-a +endif + ifeq ($(CONFIG_SHADOW_CALL_STACK), y) KBUILD_CFLAGS += -ffixed-x18 endif diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2ca7ba69c318..39273b5c14be 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -66,6 +66,7 @@ CONFIG_SCHED_SMT=y CONFIG_NUMA=y CONFIG_SECCOMP=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index a45366c3909b..bd68e1b7f29f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -47,20 +47,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); /* ACPI table mapping after acpi_permanent_mmap is set */ -static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys, - acpi_size size) -{ - /* For normal memory we already have a cacheable mapping. */ - if (memblock_is_map_memory(phys)) - return (void __iomem *)__phys_to_virt(phys); - - /* - * We should still honor the memory's attribute here because - * crash dump kernel possibly excludes some ACPI (reclaim) - * regions from memblock list. - */ - return __ioremap(phys, size, __acpi_get_mem_attribute(phys)); -} +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap typedef u64 phys_cpuid_t; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index d7b3bb0cb180..07b643a70710 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -62,7 +62,9 @@ #define ARM64_HAS_GENERIC_AUTH 52 #define ARM64_HAS_32BIT_EL1 53 #define ARM64_BTI 54 +#define ARM64_HAS_ARMv8_4_TTL 55 +#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_NCAPS 55 +#define ARM64_NCAPS 57 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7c3d1ff091d..89b4f0142c28 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -692,6 +692,12 @@ static inline bool system_supports_bti(void) return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI); } +static inline bool system_supports_tlb_range(void) +{ + return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) && + cpus_have_const_cap(ARM64_HAS_TLB_RANGE); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 @@ -774,6 +780,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) } u32 get_kvm_ipa_limit(void); +void dump_cpu_features(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 94ba0c5bced2..5abf91e3494c 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -49,6 +49,8 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); #define set_huge_swap_pte_at set_huge_swap_pte_at +void __init arm64_hugetlb_cma_reserve(void); + #include <asm-generic/hugetlb.h> #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index d683bcbf1e7c..22f73fe09030 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -95,6 +95,7 @@ #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) +/* reserved for KERNEL_HWCAP_MTE __khwcap2_feature(MTE) */ /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 3bf626f6fe0c..329fb15f6bac 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -8,7 +8,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H -#include <linux/pgtable.h> +#include <asm/pgtable-hwdef.h> #include <asm/sparsemem.h> /* diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a1871bb32bb1..afa722504bfd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -10,11 +10,8 @@ #ifndef __ASM_MEMORY_H #define __ASM_MEMORY_H -#include <linux/compiler.h> #include <linux/const.h> #include <linux/sizes.h> -#include <linux/types.h> -#include <asm/bug.h> #include <asm/page-def.h> /* @@ -157,11 +154,15 @@ #endif #ifndef __ASSEMBLY__ -extern u64 vabits_actual; -#define PAGE_END (_PAGE_END(vabits_actual)) #include <linux/bitops.h> +#include <linux/compiler.h> #include <linux/mmdebug.h> +#include <linux/types.h> +#include <asm/bug.h> + +extern u64 vabits_actual; +#define PAGE_END (_PAGE_END(vabits_actual)) extern s64 physvirt_offset; extern s64 memstart_addr; @@ -322,6 +323,7 @@ static inline void *phys_to_virt(phys_addr_t x) __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) +void dump_mem_limit(void); #endif /* !ASSEMBLY */ /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index b0bd9b55594c..f2d7537d6f83 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -175,7 +175,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp) * take CPU migration into account. */ #define destroy_context(mm) do { } while(0) -void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); +void check_and_switch_context(struct mm_struct *mm); #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) @@ -214,8 +214,6 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void __switch_mm(struct mm_struct *next) { - unsigned int cpu = smp_processor_id(); - /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -225,7 +223,7 @@ static inline void __switch_mm(struct mm_struct *next) return; } - check_and_switch_context(next, cpu); + check_and_switch_context(next); } static inline void diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index e7765b62c712..2c2d7dbe8a02 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -72,6 +72,13 @@ #define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 #define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 #define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B +#define ARMV8_PMUV3_PERFCTR_STALL 0x3C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -79,6 +86,26 @@ #define ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE 0x4002 #define ARMV8_SPE_PERFCTR_SAMPLE_COLLISION 0x4003 +/* AMUv1 architecture events */ +#define ARMV8_AMU_PERFCTR_CNT_CYCLES 0x4004 +#define ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM 0x4005 + +/* long-latency read miss events */ +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS 0x4006 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD 0x4009 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B + +/* additional latency from alignment events */ +#define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 +#define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 +#define ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT 0x4022 + +/* Armv8.5 Memory Tagging Extension events */ +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED 0x4024 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD 0x4025 +#define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 + /* ARMv8 recommended implementation defined event types */ #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9c91a8f93a0e..d400a4d9aee2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -29,7 +29,7 @@ * Size mapped by an entry at level n ( 0 <= n <= 3) * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by - * the architecture is 4. Hence, starting at at level n, we have further + * the architecture is 4. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * @@ -82,23 +82,23 @@ * Contiguous page definitions. */ #ifdef CONFIG_ARM64_64K_PAGES -#define CONT_PTE_SHIFT 5 -#define CONT_PMD_SHIFT 5 +#define CONT_PTE_SHIFT (5 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (5 + PMD_SHIFT) #elif defined(CONFIG_ARM64_16K_PAGES) -#define CONT_PTE_SHIFT 7 -#define CONT_PMD_SHIFT 5 +#define CONT_PTE_SHIFT (7 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (5 + PMD_SHIFT) #else -#define CONT_PTE_SHIFT 4 -#define CONT_PMD_SHIFT 4 +#define CONT_PTE_SHIFT (4 + PAGE_SHIFT) +#define CONT_PMD_SHIFT (4 + PMD_SHIFT) #endif -#define CONT_PTES (1 << CONT_PTE_SHIFT) +#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) #define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) -#define CONT_PMDS (1 << CONT_PMD_SHIFT) +#define CONT_PMDS (1 << (CONT_PMD_SHIFT - PMD_SHIFT)) #define CONT_PMD_SIZE (CONT_PMDS * PMD_SIZE) #define CONT_PMD_MASK (~(CONT_PMD_SIZE - 1)) -/* the the numerical offset of the PTE within a range of CONT_PTES */ +/* the numerical offset of the PTE within a range of CONT_PTES */ #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) /* @@ -178,10 +178,12 @@ #define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ #define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ #define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */ +#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */ #define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ #define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ #define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */ +#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */ #define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */ #define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */ @@ -216,6 +218,7 @@ #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 #define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) +#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET) #define TCR_EPD0_SHIFT 7 #define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 758e2d1577d0..d5d3fbe73953 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -40,6 +40,16 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + +/* Set stride and tlb_level in flush_*_tlb_range */ +#define flush_pmd_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PMD_SIZE, false, 2) +#define flush_pud_tlb_range(vma, addr, end) \ + __flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1) +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 953b6a1ce549..966ed30ed5f7 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -27,7 +27,7 @@ * * Some code sections either automatically switch back to PSR.I or explicitly * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included - * in the the priority mask, it indicates that PSR.I should be set and + * in the priority mask, it indicates that PSR.I should be set and * interrupt disabling temporarily does not rely on IRQ priorities. */ #define GIC_PRIO_IRQON 0xe0 diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index b767904f28b1..996bf98f0cab 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -256,4 +256,13 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) return (boundary - 1 < end - 1) ? boundary : end; } +/* + * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and + * the architectural page-table level. + */ +#define S2_NO_LEVEL_HINT 0 +#define S2_PUD_LEVEL 1 +#define S2_PMD_LEVEL 2 +#define S2_PTE_LEVEL 3 + #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 463175f80341..554a7e8ecb07 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -421,9 +421,9 @@ */ #define SYS_AMEVCNTR0_EL0(n) SYS_AM_EL0(4 + ((n) >> 3), (n) & 7) -#define SYS_AMEVTYPE0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER0_EL0(n) SYS_AM_EL0(6 + ((n) >> 3), (n) & 7) #define SYS_AMEVCNTR1_EL0(n) SYS_AM_EL0(12 + ((n) >> 3), (n) & 7) -#define SYS_AMEVTYPE1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) +#define SYS_AMEVTYPER1_EL0(n) SYS_AM_EL0(14 + ((n) >> 3), (n) & 7) /* AMU v1: Fixed (architecturally defined) activity monitors */ #define SYS_AMEVCNTR0_CORE_EL0 SYS_AMEVCNTR0_EL0(0) @@ -617,6 +617,9 @@ #define ID_AA64ISAR0_SHA1_SHIFT 8 #define ID_AA64ISAR0_AES_SHIFT 4 +#define ID_AA64ISAR0_TLB_RANGE_NI 0x0 +#define ID_AA64ISAR0_TLB_RANGE 0x2 + /* id_aa64isar1 */ #define ID_AA64ISAR1_I8MM_SHIFT 52 #define ID_AA64ISAR1_DGH_SHIFT 48 @@ -706,6 +709,9 @@ #define ID_AA64ZFR0_SVEVER_SVE2 0x1 /* id_aa64mmfr0 */ +#define ID_AA64MMFR0_ECV_SHIFT 60 +#define ID_AA64MMFR0_FGT_SHIFT 56 +#define ID_AA64MMFR0_EXS_SHIFT 44 #define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 #define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 #define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 @@ -734,6 +740,10 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ETS_SHIFT 36 +#define ID_AA64MMFR1_TWED_SHIFT 32 +#define ID_AA64MMFR1_XNX_SHIFT 28 +#define ID_AA64MMFR1_SPECSEI_SHIFT 24 #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_LOR_SHIFT 16 #define ID_AA64MMFR1_HPD_SHIFT 12 @@ -746,8 +756,15 @@ /* id_aa64mmfr2 */ #define ID_AA64MMFR2_E0PD_SHIFT 60 +#define ID_AA64MMFR2_EVT_SHIFT 56 +#define ID_AA64MMFR2_BBM_SHIFT 52 +#define ID_AA64MMFR2_TTL_SHIFT 48 #define ID_AA64MMFR2_FWB_SHIFT 40 +#define ID_AA64MMFR2_IDS_SHIFT 36 #define ID_AA64MMFR2_AT_SHIFT 32 +#define ID_AA64MMFR2_ST_SHIFT 28 +#define ID_AA64MMFR2_NV_SHIFT 24 +#define ID_AA64MMFR2_CCIDX_SHIFT 20 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_LSM_SHIFT 8 @@ -755,6 +772,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36 #define ID_AA64DFR0_PMSVER_SHIFT 32 #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 @@ -807,18 +825,40 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_MMFR0_INNERSHR_SHIFT 28 +#define ID_MMFR0_FCSE_SHIFT 24 +#define ID_MMFR0_AUXREG_SHIFT 20 +#define ID_MMFR0_TCM_SHIFT 16 +#define ID_MMFR0_SHARELVL_SHIFT 12 +#define ID_MMFR0_OUTERSHR_SHIFT 8 +#define ID_MMFR0_PMSA_SHIFT 4 +#define ID_MMFR0_VMSA_SHIFT 0 + #define ID_MMFR4_EVT_SHIFT 28 #define ID_MMFR4_CCIDX_SHIFT 24 #define ID_MMFR4_LSM_SHIFT 20 #define ID_MMFR4_HPDS_SHIFT 16 #define ID_MMFR4_CNP_SHIFT 12 #define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_AC2_SHIFT 4 #define ID_MMFR4_SPECSEI_SHIFT 0 #define ID_MMFR5_ETS_SHIFT 0 #define ID_PFR0_DIT_SHIFT 24 #define ID_PFR0_CSV2_SHIFT 16 +#define ID_PFR0_STATE3_SHIFT 12 +#define ID_PFR0_STATE2_SHIFT 8 +#define ID_PFR0_STATE1_SHIFT 4 +#define ID_PFR0_STATE0_SHIFT 0 + +#define ID_DFR0_PERFMON_SHIFT 24 +#define ID_DFR0_MPROFDBG_SHIFT 20 +#define ID_DFR0_MMAPTRC_SHIFT 16 +#define ID_DFR0_COPTRC_SHIFT 12 +#define ID_DFR0_MMAPDBG_SHIFT 8 +#define ID_DFR0_COPSDBG_SHIFT 4 +#define ID_DFR0_COPDBG_SHIFT 0 #define ID_PFR2_SSBS_SHIFT 4 #define ID_PFR2_CSV3_SHIFT 0 @@ -861,6 +901,11 @@ #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED #endif +#define MVFR2_FPMISC_SHIFT 4 +#define MVFR2_SIMDMISC_SHIFT 0 + +#define DCZID_DZP_SHIFT 4 +#define DCZID_BS_SHIFT 0 /* * The ZCR_ELx_LEN_* definitions intentionally include bits [8:4] which diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index b76df828e6b7..61c97d3b58c7 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -21,11 +21,37 @@ static void tlb_flush(struct mmu_gather *tlb); #include <asm-generic/tlb.h> +/* + * get the tlbi levels in arm64. Default value is 0 if more than one + * of cleared_* is set or neither is set. + * Arm64 doesn't support p4ds now. + */ +static inline int tlb_get_level(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes && !(tlb->cleared_pmds || + tlb->cleared_puds || + tlb->cleared_p4ds)) + return 3; + + if (tlb->cleared_pmds && !(tlb->cleared_ptes || + tlb->cleared_puds || + tlb->cleared_p4ds)) + return 2; + + if (tlb->cleared_puds && !(tlb->cleared_ptes || + tlb->cleared_pmds || + tlb->cleared_p4ds)) + return 1; + + return 0; +} + static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0); bool last_level = !tlb->freed_tables; unsigned long stride = tlb_get_unmap_size(tlb); + int tlb_level = tlb_get_level(tlb); /* * If we're tearing down the address space then we only care about @@ -38,7 +64,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) return; } - __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, + last_level, tlb_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index bc3949064725..d493174415db 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ +#include <linux/bitfield.h> #include <linux/mm_types.h> #include <linux/sched.h> #include <asm/cputype.h> @@ -60,6 +61,102 @@ }) /* + * Get translation granule of the system, which is decided by + * PAGE_SIZE. Used by TTL. + * - 4KB : 1 + * - 16KB : 2 + * - 64KB : 3 + */ +#define TLBI_TTL_TG_4K 1 +#define TLBI_TTL_TG_16K 2 +#define TLBI_TTL_TG_64K 3 + +static inline unsigned long get_trans_granule(void) +{ + switch (PAGE_SIZE) { + case SZ_4K: + return TLBI_TTL_TG_4K; + case SZ_16K: + return TLBI_TTL_TG_16K; + case SZ_64K: + return TLBI_TTL_TG_64K; + default: + return 0; + } +} + +/* + * Level-based TLBI operations. + * + * When ARMv8.4-TTL exists, TLBI operations take an additional hint for + * the level at which the invalidation must take place. If the level is + * wrong, no invalidation may take place. In the case where the level + * cannot be easily determined, a 0 value for the level parameter will + * perform a non-hinted invalidation. + * + * For Stage-2 invalidation, use the level values provided to that effect + * in asm/stage2_pgtable.h. + */ +#define TLBI_TTL_MASK GENMASK_ULL(47, 44) + +#define __tlbi_level(op, addr, level) do { \ + u64 arg = addr; \ + \ + if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \ + level) { \ + u64 ttl = level & 3; \ + ttl |= get_trans_granule() << 2; \ + arg &= ~TLBI_TTL_MASK; \ + arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \ + } \ + \ + __tlbi(op, arg); \ +} while(0) + +#define __tlbi_user_level(op, arg, level) do { \ + if (arm64_kernel_unmapped_at_el0()) \ + __tlbi_level(op, (arg | USER_ASID_FLAG), level); \ +} while (0) + +/* + * This macro creates a properly formatted VA operand for the TLB RANGE. + * The value bit assignments are: + * + * +----------+------+-------+-------+-------+----------------------+ + * | ASID | TG | SCALE | NUM | TTL | BADDR | + * +-----------------+-------+-------+-------+----------------------+ + * |63 48|47 46|45 44|43 39|38 37|36 0| + * + * The address range is determined by below formula: + * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE) + * + */ +#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ + ({ \ + unsigned long __ta = (addr) >> PAGE_SHIFT; \ + __ta &= GENMASK_ULL(36, 0); \ + __ta |= (unsigned long)(ttl) << 37; \ + __ta |= (unsigned long)(num) << 39; \ + __ta |= (unsigned long)(scale) << 44; \ + __ta |= get_trans_granule() << 46; \ + __ta |= (unsigned long)(asid) << 48; \ + __ta; \ + }) + +/* These macros are used by the TLBI RANGE feature. */ +#define __TLBI_RANGE_PAGES(num, scale) \ + ((unsigned long)((num) + 1) << (5 * (scale) + 1)) +#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) + +/* + * Generate 'num' values from -1 to 30 with -1 rejected by the + * __flush_tlb_range() loop below. + */ +#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) +#define __TLBI_RANGE_NUM(pages, scale) \ + ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) + +/* * TLB Invalidation * ================ * @@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - unsigned long stride, bool last_level) + unsigned long stride, bool last_level, + int tlb_level) { + int num = 0; + int scale = 0; unsigned long asid = ASID(vma->vm_mm); unsigned long addr; + unsigned long pages; start = round_down(start, stride); end = round_up(end, stride); + pages = (end - start) >> PAGE_SHIFT; - if ((end - start) >= (MAX_TLBI_OPS * stride)) { + /* + * When not uses TLB range ops, we can handle up to + * (MAX_TLBI_OPS - 1) pages; + * When uses TLB range ops, we can handle up to + * (MAX_TLBI_RANGE_PAGES - 1) pages. + */ + if ((!system_supports_tlb_range() && + (end - start) >= (MAX_TLBI_OPS * stride)) || + pages >= MAX_TLBI_RANGE_PAGES) { flush_tlb_mm(vma->vm_mm); return; } - /* Convert the stride into units of 4k */ - stride >>= 12; + dsb(ishst); - start = __TLBI_VADDR(start, asid); - end = __TLBI_VADDR(end, asid); + /* + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the the TLB + * range ops are supported, then: + * + * 1. If 'pages' is odd, flush the first page through non-range + * operations; + * + * 2. For remaining pages: the minimum range granularity is decided + * by 'scale', so multiple range TLBI operations may be required. + * Start from scale = 0, flush the corresponding number of pages + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it + * until no pages left. + * + * Note that certain ranges can be represented by either num = 31 and + * scale or num = 0 and scale + 1. The loop below favours the latter + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + */ + while (pages > 0) { + if (!system_supports_tlb_range() || + pages % 2 == 1) { + addr = __TLBI_VADDR(start, asid); + if (last_level) { + __tlbi_level(vale1is, addr, tlb_level); + __tlbi_user_level(vale1is, addr, tlb_level); + } else { + __tlbi_level(vae1is, addr, tlb_level); + __tlbi_user_level(vae1is, addr, tlb_level); + } + start += stride; + pages -= stride >> PAGE_SHIFT; + continue; + } - dsb(ishst); - for (addr = start; addr < end; addr += stride) { - if (last_level) { - __tlbi(vale1is, addr); - __tlbi_user(vale1is, addr); - } else { - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); + num = __TLBI_RANGE_NUM(pages, scale); + if (num >= 0) { + addr = __TLBI_VADDR_RANGE(start, asid, scale, + num, tlb_level); + if (last_level) { + __tlbi(rvale1is, addr); + __tlbi_user(rvale1is, addr); + } else { + __tlbi(rvae1is, addr); + __tlbi_user(rvae1is, addr); + } + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; + pages -= __TLBI_RANGE_PAGES(num, scale); } + scale++; } dsb(ish); } @@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. + * Set the tlb_level to 0 because we can not get enough information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index bc5c7b091152..8d7c466f809b 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -19,6 +19,7 @@ #include <linux/string.h> #include <asm/cpufeature.h> +#include <asm/mmu.h> #include <asm/ptrace.h> #include <asm/memory.h> #include <asm/extable.h> diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 07468428fd29..f99dcb94b438 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -12,6 +12,8 @@ */ #define VDSO_LBASE 0x0 +#define __VVAR_PAGES 2 + #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 9a625e8947ff..75cbae60455b 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -152,6 +153,18 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return ret; } +#ifdef CONFIG_TIME_NS +static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + const struct vdso_data *ret; + + /* See __arch_get_vdso_data(). */ + asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data)); + + return ret; +} +#endif + static inline bool vdso_clocksource_ok(const struct vdso_data *vd) { return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index afba6ba332f8..9c29ad3049f8 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/unistd.h> #define VDSO_HAS_CLOCK_GETRES 1 @@ -96,6 +97,14 @@ const struct vdso_data *__arch_get_vdso_data(void) return _vdso_data; } +#ifdef CONFIG_TIME_NS +static __always_inline +const struct vdso_data *__arch_get_timens_vdso_data(void) +{ + return _timens_data; +} +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 2d6ba1c2592e..912162f73529 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -74,5 +74,6 @@ #define HWCAP2_DGH (1 << 15) #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) +/* reserved for HWCAP2_MTE (1 << 18) */ #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 8b0ebce92427..0c796c795dbe 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -179,7 +179,7 @@ struct sve_context { * The same convention applies when returning from a signal: a caller * will need to remove or resize the sve_context block if it wants to * make the SVE registers live when they were previously non-live or - * vice-versa. This may require the the caller to allocate fresh + * vice-versa. This may require the caller to allocate fresh * memory and/or move other context blocks in the signal frame. * * Changing the vector length during signal return is not permitted: diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a7586a4db142..455966401102 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -261,6 +261,81 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) return __pgprot(PROT_DEVICE_nGnRnE); } +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + efi_memory_desc_t *md, *region = NULL; + pgprot_t prot; + + if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP))) + return NULL; + + for_each_efi_memory_desc(md) { + u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + if (phys < md->phys_addr || phys >= end) + continue; + + if (phys + size > end) { + pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n"); + return NULL; + } + region = md; + break; + } + + /* + * It is fine for AML to remap regions that are not represented in the + * EFI memory map at all, as it only describes normal memory, and MMIO + * regions that require a virtual mapping to make them accessible to + * the EFI runtime services. + */ + prot = __pgprot(PROT_DEVICE_nGnRnE); + if (region) { + switch (region->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + case EFI_PERSISTENT_MEMORY: + pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); + return NULL; + + case EFI_RUNTIME_SERVICES_CODE: + /* + * This would be unusual, but not problematic per se, + * as long as we take care not to create a writable + * mapping for executable code. + */ + prot = PAGE_KERNEL_RO; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + /* + * ACPI reclaim memory is used to pass firmware tables + * and other data that is intended for consumption by + * the OS only, which may decide it wants to reclaim + * that memory and use it for something else. We never + * do that, but we usually add it to the linear map + * anyway, in which case we should use the existing + * mapping. + */ + if (memblock_is_map_memory(phys)) + return (void __iomem *)__phys_to_virt(phys); + /* fall through */ + + default: + if (region->attribute & EFI_MEMORY_WB) + prot = PAGE_KERNEL; + else if (region->attribute & EFI_MEMORY_WT) + prot = __pgprot(PROT_NORMAL_WT); + else if (region->attribute & EFI_MEMORY_WC) + prot = __pgprot(PROT_NORMAL_NC); + } + } + return __ioremap(phys, size, prot); +} + /* * Claim Synchronous External Aborts as a firmware first notification. * diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9fae0efc80c1..a389b999482e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -119,25 +119,12 @@ static inline void finalize_system_capabilities(void) static_branch_enable(&arm64_const_caps_ready); } -static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); - return 0; } -static struct notifier_block cpu_hwcaps_notifier = { - .notifier_call = dump_cpu_hwcaps -}; - -static int __init register_cpu_hwcaps_dumper(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &cpu_hwcaps_notifier); - return 0; -} -__initcall(register_cpu_hwcaps_dumper); - DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); @@ -269,6 +256,9 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* * Page size not being supported at Stage-2 is not fatal. You * just give up KVM if PAGE_SIZE isn't supported there. Go fix @@ -312,6 +302,10 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0), @@ -323,8 +317,15 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), @@ -345,7 +346,7 @@ static const struct arm64_ftr_bits ftr_ctr[] = { * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_VIPT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_L1IP_SHIFT, 2, ICACHE_POLICY_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_IMINLINE_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -356,19 +357,19 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_INNERSHR_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_FCSE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_AUXREG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_TCM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_SHARELVL_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_OUTERSHR_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_PMSA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_VMSA_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -384,14 +385,14 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_FPMISC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_SIMDMISC_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_DZP_SHIFT, 1, 1), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_BS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -423,7 +424,8 @@ static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_AC2_SHIFT, 4, 0), + /* * SpecSEI = 1 indicates that the PE might generate an SError on an * external abort on speculative read. It is safe to assume that an @@ -465,10 +467,10 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = { static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_STATE0_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -492,13 +494,13 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPSDBG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPDBG_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -697,11 +699,52 @@ static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, static void __init sort_ftr_regs(void) { - int i; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) { + const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg; + const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits; + unsigned int j = 0; + + /* + * Features here must be sorted in descending order with respect + * to their shift values and should not overlap with each other. + */ + for (; ftr_bits->width != 0; ftr_bits++, j++) { + unsigned int width = ftr_reg->ftr_bits[j].width; + unsigned int shift = ftr_reg->ftr_bits[j].shift; + unsigned int prev_shift; + + WARN((shift + width) > 64, + "%s has invalid feature at shift %d\n", + ftr_reg->name, shift); + + /* + * Skip the first feature. There is nothing to + * compare against for now. + */ + if (j == 0) + continue; + + prev_shift = ftr_reg->ftr_bits[j - 1].shift; + WARN((shift + width) > prev_shift, + "%s has feature overlap at shift %d\n", + ftr_reg->name, shift); + } - /* Check that the array is sorted so that we can do the binary search */ - for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++) + /* + * Skip the first register. There is nothing to + * compare against for now. + */ + if (i == 0) + continue; + /* + * Registers here must be sorted in ascending order with respect + * to sys_id for subsequent binary search in get_arm64_ftr_reg() + * to work correctly. + */ BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id); + } } /* @@ -1884,6 +1927,26 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .cpu_enable = cpu_has_fwb, }, + { + .desc = "ARMv8.4 Translation Table Level", + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .capability = ARM64_HAS_ARMv8_4_TTL, + .sys_reg = SYS_ID_AA64MMFR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .min_field_value = 1, + .matches = has_cpuid_feature, + }, + { + .desc = "TLB range maintenance instructions", + .capability = ARM64_HAS_TLB_RANGE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = ID_AA64ISAR0_TLB_RANGE, + }, #ifdef CONFIG_ARM64_HW_AFDBM { /* diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86637466daa8..393c6fb1f1cb 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -93,6 +93,7 @@ static const char *const hwcap_str[] = { "dgh", "rng", "bti", + /* reserved for "mte" */ NULL }; diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 1f646b07e3e9..314391a156ee 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -7,6 +7,14 @@ #include <linux/crash_core.h> #include <asm/cpufeature.h> #include <asm/memory.h> +#include <asm/pgtable-hwdef.h> + +static inline u64 get_tcr_el1_t1sz(void); + +static inline u64 get_tcr_el1_t1sz(void) +{ + return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; +} void arch_crash_save_vmcoreinfo(void) { @@ -16,6 +24,8 @@ void arch_crash_save_vmcoreinfo(void) kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); + vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n", + get_tcr_el1_t1sz()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", system_supports_address_auth() ? diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 35de8ba60e3d..2646178c8329 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -15,6 +15,7 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/asm_pointer_auth.h> +#include <asm/bug.h> #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> @@ -226,28 +227,9 @@ alternative_else_nop_endif add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Set the TTBR0 PAN bit in SPSR. When the exception is taken from - * EL0, there is no need to check the state of TTBR0_EL1 since - * accesses are always enabled. - * Note that the meaning of this bit differs from the ARMv8.1 PAN - * feature as all TTBR0_EL1 accesses are disabled, not just those to - * user mappings. - */ -alternative_if ARM64_HAS_PAN - b 1f // skip TTBR0 PAN +alternative_if_not ARM64_HAS_PAN + bl __swpan_entry_el\el alternative_else_nop_endif - - .if \el != 0 - mrs x21, ttbr0_el1 - tst x21, #TTBR_ASID_MASK // Check for the reserved ASID - orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR - b.eq 1f // TTBR0 access already disabled - and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR - .endif - - __uaccess_ttbr0_disable x21 -1: #endif stp x22, x23, [sp, #S_PC] @@ -301,34 +283,9 @@ alternative_else_nop_endif .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN - /* - * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR - * PAN bit checking. - */ -alternative_if ARM64_HAS_PAN - b 2f // skip TTBR0 PAN +alternative_if_not ARM64_HAS_PAN + bl __swpan_exit_el\el alternative_else_nop_endif - - .if \el != 0 - tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set - .endif - - __uaccess_ttbr0_enable x0, x1 - - .if \el == 0 - /* - * Enable errata workarounds only if returning to user. The only - * workaround currently required for TTBR0_EL1 changes are for the - * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache - * corruption). - */ - bl post_ttbr_update_workaround - .endif -1: - .if \el != 0 - and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit - .endif -2: #endif .if \el == 0 @@ -401,6 +358,49 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 sb .endm +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +SYM_CODE_START_LOCAL(__swpan_entry_el1) + mrs x21, ttbr0_el1 + tst x21, #TTBR_ASID_MASK // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR +SYM_INNER_LABEL(__swpan_entry_el0, SYM_L_LOCAL) + __uaccess_ttbr0_disable x21 +1: ret +SYM_CODE_END(__swpan_entry_el1) + + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +SYM_CODE_START_LOCAL(__swpan_exit_el1) + tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set + __uaccess_ttbr0_enable x0, x1 +1: and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + ret +SYM_CODE_END(__swpan_exit_el1) + +SYM_CODE_START_LOCAL(__swpan_exit_el0) + __uaccess_ttbr0_enable x0, x1 + /* + * Enable errata workarounds only if returning to user. The only + * workaround currently required for TTBR0_EL1 changes are for the + * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache + * corruption). + */ + b post_ttbr_update_workaround +SYM_CODE_END(__swpan_exit_el0) +#endif + .macro irq_stack_entry mov x19, sp // preserve the original sp #ifdef CONFIG_SHADOW_CALL_STACK diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 65b08a74aec6..0ce3a28e3347 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -253,6 +253,40 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, return ret; } +static bool branch_rela_needs_plt(Elf64_Sym *syms, Elf64_Rela *rela, + Elf64_Word dstidx) +{ + + Elf64_Sym *s = syms + ELF64_R_SYM(rela->r_info); + + if (s->st_shndx == dstidx) + return false; + + return ELF64_R_TYPE(rela->r_info) == R_AARCH64_JUMP26 || + ELF64_R_TYPE(rela->r_info) == R_AARCH64_CALL26; +} + +/* Group branch PLT relas at the front end of the array. */ +static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, + int numrels, Elf64_Word dstidx) +{ + int i = 0, j = numrels - 1; + + if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE)) + return 0; + + while (i < j) { + if (branch_rela_needs_plt(syms, &rela[i], dstidx)) + i++; + else if (branch_rela_needs_plt(syms, &rela[j], dstidx)) + swap(rela[i], rela[j]); + else + j--; + } + + return i; +} + int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { @@ -290,7 +324,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, for (i = 0; i < ehdr->e_shnum; i++) { Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; - int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); + int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info; if (sechdrs[i].sh_type != SHT_RELA) @@ -300,8 +334,14 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (!(dstsec->sh_flags & SHF_EXECINSTR)) continue; - /* sort by type, symbol index and addend */ - sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); + /* + * sort branch relocations requiring a PLT by type, symbol index + * and addend + */ + nents = partition_branch_plt_relas(syms, rels, numrels, + sechdrs[i].sh_info); + if (nents) + sort(rels, nents, sizeof(Elf64_Rela), cmp_rela, NULL); if (!str_has_prefix(secstrings + dstsec->sh_name, ".init")) core_plts += count_plts(syms, rels, numrels, diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4d7879484cec..462f9a9cc44b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -13,12 +13,15 @@ #include <asm/sysreg.h> #include <asm/virt.h> +#include <clocksource/arm_arch_timer.h> + #include <linux/acpi.h> #include <linux/clocksource.h> #include <linux/kvm_host.h> #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <linux/smp.h> /* ARMv8 Cortex-A53 specific event types. */ @@ -155,7 +158,7 @@ armv8pmu_events_sysfs_show(struct device *dev, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%03llx\n", pmu_attr->id); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } #define ARMV8_EVENT_ATTR(name, config) \ @@ -222,10 +225,29 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(l1d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(op_retired, ARMV8_PMUV3_PERFCTR_OP_RETIRED), + ARMV8_EVENT_ATTR(op_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC), + ARMV8_EVENT_ATTR(stall, ARMV8_PMUV3_PERFCTR_STALL), + ARMV8_EVENT_ATTR(stall_slot_backend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND), + ARMV8_EVENT_ATTR(stall_slot_frontend, ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND), + ARMV8_EVENT_ATTR(stall_slot, ARMV8_PMUV3_PERFCTR_STALL_SLOT), ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), + ARMV8_EVENT_ATTR(cnt_cycles, ARMV8_AMU_PERFCTR_CNT_CYCLES), + ARMV8_EVENT_ATTR(stall_backend_mem, ARMV8_AMU_PERFCTR_STALL_BACKEND_MEM), + ARMV8_EVENT_ATTR(l1i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L1I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), + ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), + ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), + ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), + ARMV8_EVENT_ATTR(mem_access_checked, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED), + ARMV8_EVENT_ATTR(mem_access_checked_rd, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_RD), + ARMV8_EVENT_ATTR(mem_access_checked_wr, ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR), NULL, }; @@ -244,10 +266,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) return attr->mode; - pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; - if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap)) - return attr->mode; + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } return 0; } @@ -1165,28 +1190,54 @@ device_initcall(armv8_pmu_driver_init) void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - u32 freq; - u32 shift; + struct clock_read_data *rd; + unsigned int seq; + u64 ns; - /* - * Internal timekeeping for enabled/running/stopped times - * is always computed with the sched_clock. - */ - freq = arch_timer_get_rate(); - userpg->cap_user_time = 1; + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + + do { + rd = sched_clock_read_begin(&seq); + + if (rd->read_sched_clock != arch_timer_read_counter) + return; + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; - clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, - NSEC_PER_SEC, 0); /* * time_shift is not expected to be greater than 31 due to * the original published conversion algorithm shifting a * 32-bit value (now specifies a 64-bit value) - refer * perf_event_mmap_page documentation in perf_event.h. */ - if (shift == 32) { - shift = 31; + if (userpg->time_shift == 32) { + userpg->time_shift = 31; userpg->time_mult >>= 1; } - userpg->time_shift = (u16)shift; - userpg->time_offset = -now; + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..c793276ec7ad 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -400,11 +400,7 @@ static int __init topology_init(void) } subsys_initcall(topology_init); -/* - * Dump out kernel offset information on panic. - */ -static int dump_kernel_offset(struct notifier_block *self, unsigned long v, - void *p) +static void dump_kernel_offset(void) { const unsigned long offset = kaslr_offset(); @@ -415,17 +411,25 @@ static int dump_kernel_offset(struct notifier_block *self, unsigned long v, } else { pr_emerg("Kernel Offset: disabled\n"); } +} + +static int arm64_panic_block_dump(struct notifier_block *self, + unsigned long v, void *p) +{ + dump_kernel_offset(); + dump_cpu_features(); + dump_mem_limit(); return 0; } -static struct notifier_block kernel_offset_notifier = { - .notifier_call = dump_kernel_offset +static struct notifier_block arm64_panic_block = { + .notifier_call = arm64_panic_block_dump }; -static int __init register_kernel_offset_dumper(void) +static int __init register_arm64_panic_block(void) { atomic_notifier_chain_register(&panic_notifier_list, - &kernel_offset_notifier); + &arm64_panic_block); return 0; } -__initcall(register_kernel_offset_dumper); +device_initcall(register_arm64_panic_block); diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 139679c745bf..2dd8e3b8b94b 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -199,12 +199,12 @@ static noinline void __save_stack_trace(struct task_struct *tsk, put_task_stack(tsk); } -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { __save_stack_trace(tsk, trace, 1); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 47f651df781c..13ebd5ca2070 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -855,7 +855,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", tsk_stk, tsk_stk + THREAD_SIZE); pr_emerg("IRQ stack: [0x%016lx..0x%016lx]\n", - irq_stk, irq_stk + THREAD_SIZE); + irq_stk, irq_stk + IRQ_STACK_SIZE); pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n", ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e546df0efefb..d4202a32abc9 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/time_namespace.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> #include <vdso/datapage.h> @@ -40,6 +41,12 @@ enum vdso_abi { #endif /* CONFIG_COMPAT_VDSO */ }; +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + struct vdso_abi_info { const char *name; const char *vdso_code_start; @@ -107,25 +114,122 @@ static int __vdso_init(enum vdso_abi abi) vdso_info[abi].vdso_code_start) >> PAGE_SHIFT; - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1, + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) return -ENOMEM; - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - - /* Grab the vDSO code pages. */ pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); for (i = 0; i < vdso_info[abi].vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + vdso_pagelist[i] = pfn_to_page(pfn + i); + + vdso_info[abi].cm->pages = vdso_pagelist; + + return 0; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + + mmap_read_lock(mm); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long size = vma->vm_end - vma->vm_start; - vdso_info[abi].dm->pages = &vdso_pagelist[0]; - vdso_info[abi].cm->pages = &vdso_pagelist[1]; + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) + zap_page_range(vma, vma->vm_start, size); +#ifdef CONFIG_COMPAT_VDSO + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) + zap_page_range(vma, vma->vm_start, size); +#endif + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static int vvar_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + + if (new_size != VVAR_NR_PAGES * PAGE_SIZE) + return -EINVAL; return 0; } @@ -139,9 +243,11 @@ static int __setup_additional_pages(enum vdso_abi abi, unsigned long gp_flags = 0; void *ret; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -149,8 +255,8 @@ static int __setup_additional_pages(enum vdso_abi abi, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, + ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_PFNMAP, vdso_info[abi].dm); if (IS_ERR(ret)) goto up_fail; @@ -158,7 +264,7 @@ static int __setup_additional_pages(enum vdso_abi abi, if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) gp_flags = VM_ARM64_BTI; - vdso_base += PAGE_SIZE; + vdso_base += VVAR_NR_PAGES * PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| @@ -206,6 +312,8 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { #ifdef CONFIG_COMPAT_VDSO [AA32_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, + .mremap = vvar_mremap, }, [AA32_MAP_VDSO] = { .name = "[vdso]", @@ -371,6 +479,8 @@ enum aarch64_map { static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { [AA64_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, + .mremap = vvar_mremap, }, [AA64_MAP_VDSO] = { .name = "[vdso]", diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 7ad2d3a0cd48..d808ad31e01f 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -17,7 +17,10 @@ OUTPUT_ARCH(aarch64) SECTIONS { - PROVIDE(_vdso_data = . - PAGE_SIZE); + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 337d03522048..3348ce5ea306 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -17,7 +17,10 @@ OUTPUT_ARCH(arm) SECTIONS { - PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE); +#endif . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5423ffe0a987..ec8e894684a7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -10,7 +10,6 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> #include <asm/kernel-pgtable.h> -#include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index e76c0e89d48e..86971fe26f3d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -6,6 +6,7 @@ #include <linux/linkage.h> +#include <asm/alternative.h> #include <asm/assembler.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index baf5ce9225ce..d3196671c590 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1024,9 +1024,9 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Macro to expand the AMU counter and type registers*/ #define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu } -#define AMU_AMEVTYPE0_EL0(n) { SYS_DESC(SYS_AMEVTYPE0_EL0(n)), access_amu } +#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu } #define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu } -#define AMU_AMEVTYPE1_EL0(n) { SYS_DESC(SYS_AMEVTYPE1_EL0(n)), access_amu } +#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu } static bool trap_ptrauth(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -1629,22 +1629,22 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVCNTR0_EL0(13), AMU_AMEVCNTR0_EL0(14), AMU_AMEVCNTR0_EL0(15), - AMU_AMEVTYPE0_EL0(0), - AMU_AMEVTYPE0_EL0(1), - AMU_AMEVTYPE0_EL0(2), - AMU_AMEVTYPE0_EL0(3), - AMU_AMEVTYPE0_EL0(4), - AMU_AMEVTYPE0_EL0(5), - AMU_AMEVTYPE0_EL0(6), - AMU_AMEVTYPE0_EL0(7), - AMU_AMEVTYPE0_EL0(8), - AMU_AMEVTYPE0_EL0(9), - AMU_AMEVTYPE0_EL0(10), - AMU_AMEVTYPE0_EL0(11), - AMU_AMEVTYPE0_EL0(12), - AMU_AMEVTYPE0_EL0(13), - AMU_AMEVTYPE0_EL0(14), - AMU_AMEVTYPE0_EL0(15), + AMU_AMEVTYPER0_EL0(0), + AMU_AMEVTYPER0_EL0(1), + AMU_AMEVTYPER0_EL0(2), + AMU_AMEVTYPER0_EL0(3), + AMU_AMEVTYPER0_EL0(4), + AMU_AMEVTYPER0_EL0(5), + AMU_AMEVTYPER0_EL0(6), + AMU_AMEVTYPER0_EL0(7), + AMU_AMEVTYPER0_EL0(8), + AMU_AMEVTYPER0_EL0(9), + AMU_AMEVTYPER0_EL0(10), + AMU_AMEVTYPER0_EL0(11), + AMU_AMEVTYPER0_EL0(12), + AMU_AMEVTYPER0_EL0(13), + AMU_AMEVTYPER0_EL0(14), + AMU_AMEVTYPER0_EL0(15), AMU_AMEVCNTR1_EL0(0), AMU_AMEVCNTR1_EL0(1), AMU_AMEVCNTR1_EL0(2), @@ -1661,22 +1661,22 @@ static const struct sys_reg_desc sys_reg_descs[] = { AMU_AMEVCNTR1_EL0(13), AMU_AMEVCNTR1_EL0(14), AMU_AMEVCNTR1_EL0(15), - AMU_AMEVTYPE1_EL0(0), - AMU_AMEVTYPE1_EL0(1), - AMU_AMEVTYPE1_EL0(2), - AMU_AMEVTYPE1_EL0(3), - AMU_AMEVTYPE1_EL0(4), - AMU_AMEVTYPE1_EL0(5), - AMU_AMEVTYPE1_EL0(6), - AMU_AMEVTYPE1_EL0(7), - AMU_AMEVTYPE1_EL0(8), - AMU_AMEVTYPE1_EL0(9), - AMU_AMEVTYPE1_EL0(10), - AMU_AMEVTYPE1_EL0(11), - AMU_AMEVTYPE1_EL0(12), - AMU_AMEVTYPE1_EL0(13), - AMU_AMEVTYPE1_EL0(14), - AMU_AMEVTYPE1_EL0(15), + AMU_AMEVTYPER1_EL0(0), + AMU_AMEVTYPER1_EL0(1), + AMU_AMEVTYPER1_EL0(2), + AMU_AMEVTYPER1_EL0(3), + AMU_AMEVTYPER1_EL0(4), + AMU_AMEVTYPER1_EL0(5), + AMU_AMEVTYPER1_EL0(6), + AMU_AMEVTYPER1_EL0(7), + AMU_AMEVTYPER1_EL0(8), + AMU_AMEVTYPER1_EL0(9), + AMU_AMEVTYPER1_EL0(10), + AMU_AMEVTYPER1_EL0(11), + AMU_AMEVTYPER1_EL0(12), + AMU_AMEVTYPER1_EL0(13), + AMU_AMEVTYPER1_EL0(14), + AMU_AMEVTYPER1_EL0(15), { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index d702d60e64da..a206655a39a5 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -198,9 +198,10 @@ set_asid: return idx2asid(asid) | generation; } -void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) +void check_and_switch_context(struct mm_struct *mm) { unsigned long flags; + unsigned int cpu; u64 asid, old_active_asid; if (system_supports_cnp()) @@ -222,9 +223,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) * relaxed xchg in flush_context will treat us as reserved * because atomic RmWs are totally ordered for a given location. */ - old_active_asid = atomic64_read(&per_cpu(active_asids, cpu)); + old_active_asid = atomic64_read(this_cpu_ptr(&active_asids)); if (old_active_asid && asid_gen_match(asid) && - atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu), + atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids), old_active_asid, asid)) goto switch_mm_fastpath; @@ -236,10 +237,11 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) atomic64_set(&mm->context.id, asid); } + cpu = smp_processor_id(); if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) local_flush_tlb_all(); - atomic64_set(&per_cpu(active_asids, cpu), asid); + atomic64_set(this_cpu_ptr(&active_asids), asid); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0a52ce46f020..aa421bf4956e 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -19,6 +19,44 @@ #include <asm/tlbflush.h> #include <asm/pgalloc.h> +/* + * HugeTLB Support Matrix + * + * --------------------------------------------------- + * | Page Size | CONT PTE | PMD | CONT PMD | PUD | + * --------------------------------------------------- + * | 4K | 64K | 2M | 32M | 1G | + * | 16K | 2M | 32M | 1G | | + * | 64K | 2M | 512M | 16G | | + * --------------------------------------------------- + */ + +/* + * Reserve CMA areas for the largest supported gigantic + * huge page when requested. Any other smaller gigantic + * huge pages could still be served from those areas. + */ +#ifdef CONFIG_CMA +void __init arm64_hugetlb_cma_reserve(void) +{ + int order; + +#ifdef CONFIG_ARM64_4K_PAGES + order = PUD_SHIFT - PAGE_SHIFT; +#else + order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; +#endif + /* + * HugeTLB CMA reservation is required for gigantic + * huge pages which could not be allocated via the + * page allocator. Just warn if there is any change + * breaking this assumption. + */ + WARN_ON(order <= MAX_ORDER); + hugetlb_cma_reserve(order); +} +#endif /* CONFIG_CMA */ + #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION bool arch_hugetlb_migration_supported(struct hstate *h) { @@ -457,9 +495,9 @@ static int __init hugetlbpage_init(void) #ifdef CONFIG_ARM64_4K_PAGES hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); #endif - hugetlb_add_hstate((CONT_PMD_SHIFT + PMD_SHIFT) - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); - hugetlb_add_hstate((CONT_PTE_SHIFT + PAGE_SHIFT) - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); return 0; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..f8c19c6c8e71 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -425,8 +425,8 @@ void __init bootmem_init(void) * initialize node_online_map that gets used in hugetlb_cma_reserve() * while allocating required CMA size across online nodes. */ -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) + arm64_hugetlb_cma_reserve(); #endif /* @@ -563,27 +563,11 @@ void free_initmem(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -/* - * Dump out memory limit information on panic. - */ -static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p) +void dump_mem_limit(void) { if (memory_limit != PHYS_ADDR_MAX) { pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20); } else { pr_emerg("Memory Limit: none\n"); } - return 0; -} - -static struct notifier_block mem_limit_notifier = { - .notifier_call = dump_mem_limit, -}; - -static int __init register_mem_limit_dumper(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, - &mem_limit_notifier); - return 0; } -__initcall(register_mem_limit_dumper); diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index c8e818688ec1..3099362d9f26 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> #include <asm/unistd.h> #include <asm/csr.h> #include <uapi/linux/time.h> |