From 60c1b220d8bc6baeaf837cd60f94a331b25c26bc Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Thu, 27 Jun 2019 12:52:58 -0700 Subject: cpu-topology: Move cpu topology code to common code. Both RISC-V & ARM64 are using cpu-map device tree to describe their cpu topology. It's better to move the relevant code to a common place instead of duplicate code. To: Will Deacon To: Catalin Marinas Signed-off-by: Atish Patra [Tested on QDF2400] Tested-by: Jeffrey Hugo [Tested on Juno and other embedded platforms.] Tested-by: Sudeep Holla Reviewed-by: Sudeep Holla Acked-by: Will Deacon Acked-by: Greg Kroah-Hartman Signed-off-by: Paul Walmsley --- arch/arm64/kernel/topology.c | 303 +------------------------------------------ 1 file changed, 4 insertions(+), 299 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0825c4a856e3..6b95c91e7d67 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -14,250 +14,13 @@ #include #include #include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -static int __init get_cpu_for_node(struct device_node *node) -{ - struct device_node *cpu_node; - int cpu; - - cpu_node = of_parse_phandle(node, "cpu", 0); - if (!cpu_node) - return -1; - - cpu = of_cpu_node_to_id(cpu_node); - if (cpu >= 0) - topology_parse_cpu_capacity(cpu_node, cpu); - else - pr_crit("Unable to find CPU node for %pOF\n", cpu_node); - - of_node_put(cpu_node); - return cpu; -} - -static int __init parse_core(struct device_node *core, int package_id, - int core_id) -{ - char name[10]; - bool leaf = true; - int i = 0; - int cpu; - struct device_node *t; - - do { - snprintf(name, sizeof(name), "thread%d", i); - t = of_get_child_by_name(core, name); - if (t) { - leaf = false; - cpu = get_cpu_for_node(t); - if (cpu >= 0) { - cpu_topology[cpu].package_id = package_id; - cpu_topology[cpu].core_id = core_id; - cpu_topology[cpu].thread_id = i; - } else { - pr_err("%pOF: Can't get CPU for thread\n", - t); - of_node_put(t); - return -EINVAL; - } - of_node_put(t); - } - i++; - } while (t); - - cpu = get_cpu_for_node(core); - if (cpu >= 0) { - if (!leaf) { - pr_err("%pOF: Core has both threads and CPU\n", - core); - return -EINVAL; - } - - cpu_topology[cpu].package_id = package_id; - cpu_topology[cpu].core_id = core_id; - } else if (leaf) { - pr_err("%pOF: Can't get CPU for leaf core\n", core); - return -EINVAL; - } - - return 0; -} - -static int __init parse_cluster(struct device_node *cluster, int depth) -{ - char name[10]; - bool leaf = true; - bool has_cores = false; - struct device_node *c; - static int package_id __initdata; - int core_id = 0; - int i, ret; - - /* - * First check for child clusters; we currently ignore any - * information about the nesting of clusters and present the - * scheduler with a flat list of them. - */ - i = 0; - do { - snprintf(name, sizeof(name), "cluster%d", i); - c = of_get_child_by_name(cluster, name); - if (c) { - leaf = false; - ret = parse_cluster(c, depth + 1); - of_node_put(c); - if (ret != 0) - return ret; - } - i++; - } while (c); - - /* Now check for cores */ - i = 0; - do { - snprintf(name, sizeof(name), "core%d", i); - c = of_get_child_by_name(cluster, name); - if (c) { - has_cores = true; - - if (depth == 0) { - pr_err("%pOF: cpu-map children should be clusters\n", - c); - of_node_put(c); - return -EINVAL; - } - - if (leaf) { - ret = parse_core(c, package_id, core_id++); - } else { - pr_err("%pOF: Non-leaf cluster with core %s\n", - cluster, name); - ret = -EINVAL; - } - - of_node_put(c); - if (ret != 0) - return ret; - } - i++; - } while (c); - - if (leaf && !has_cores) - pr_warn("%pOF: empty cluster\n", cluster); - - if (leaf) - package_id++; - - return 0; -} - -static int __init parse_dt_topology(void) -{ - struct device_node *cn, *map; - int ret = 0; - int cpu; - - cn = of_find_node_by_path("/cpus"); - if (!cn) { - pr_err("No CPU information found in DT\n"); - return 0; - } - - /* - * When topology is provided cpu-map is essentially a root - * cluster with restricted subnodes. - */ - map = of_get_child_by_name(cn, "cpu-map"); - if (!map) - goto out; - - ret = parse_cluster(map, 0); - if (ret != 0) - goto out_map; - - topology_normalize_cpu_scale(); - - /* - * Check that all cores are in the topology; the SMP code will - * only mark cores described in the DT as possible. - */ - for_each_possible_cpu(cpu) - if (cpu_topology[cpu].package_id == -1) - ret = -EINVAL; - -out_map: - of_node_put(map); -out: - of_node_put(cn); - return ret; -} - -/* - * cpu topology table - */ -struct cpu_topology cpu_topology[NR_CPUS]; -EXPORT_SYMBOL_GPL(cpu_topology); - -const struct cpumask *cpu_coregroup_mask(int cpu) -{ - const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); - - /* Find the smaller of NUMA, core or LLC siblings */ - if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { - /* not numa in package, lets use the package siblings */ - core_mask = &cpu_topology[cpu].core_sibling; - } - if (cpu_topology[cpu].llc_id != -1) { - if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) - core_mask = &cpu_topology[cpu].llc_sibling; - } - - return core_mask; -} - -static void update_siblings_masks(unsigned int cpuid) -{ - struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; - int cpu; - - /* update core and thread sibling masks */ - for_each_online_cpu(cpu) { - cpu_topo = &cpu_topology[cpu]; - - if (cpuid_topo->llc_id == cpu_topo->llc_id) { - cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); - cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); - } - - if (cpuid_topo->package_id != cpu_topo->package_id) - continue; - - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); - - if (cpuid_topo->core_id != cpu_topo->core_id) - continue; - - cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); - cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); - } -} - void store_cpu_topology(unsigned int cpuid) { struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; @@ -296,59 +59,19 @@ topology_populated: update_siblings_masks(cpuid); } -static void clear_cpu_topology(int cpu) -{ - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; - - cpumask_clear(&cpu_topo->llc_sibling); - cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); - - cpumask_clear(&cpu_topo->core_sibling); - cpumask_set_cpu(cpu, &cpu_topo->core_sibling); - cpumask_clear(&cpu_topo->thread_sibling); - cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); -} - -static void __init reset_cpu_topology(void) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - struct cpu_topology *cpu_topo = &cpu_topology[cpu]; - - cpu_topo->thread_id = -1; - cpu_topo->core_id = 0; - cpu_topo->package_id = -1; - cpu_topo->llc_id = -1; - - clear_cpu_topology(cpu); - } -} - -void remove_cpu_topology(unsigned int cpu) -{ - int sibling; - - for_each_cpu(sibling, topology_core_cpumask(cpu)) - cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); - for_each_cpu(sibling, topology_sibling_cpumask(cpu)) - cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); - for_each_cpu(sibling, topology_llc_cpumask(cpu)) - cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); - - clear_cpu_topology(cpu); -} - #ifdef CONFIG_ACPI /* * Propagate the topology information of the processor_topology_node tree to the * cpu_topology array. */ -static int __init parse_acpi_topology(void) +int __init parse_acpi_topology(void) { bool is_threaded; int cpu, topology_id; + if (acpi_disabled) + return 0; + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; for_each_possible_cpu(cpu) { @@ -384,24 +107,6 @@ static int __init parse_acpi_topology(void) return 0; } - -#else -static inline int __init parse_acpi_topology(void) -{ - return -EINVAL; -} #endif -void __init init_cpu_topology(void) -{ - reset_cpu_topology(); - /* - * Discard anything that was parsed if we hit an error so we - * don't use partial information. - */ - if (!acpi_disabled && parse_acpi_topology()) - reset_cpu_topology(); - else if (of_have_populated_dt() && parse_dt_topology()) - reset_cpu_topology(); -} -- cgit v1.2.3 From ca786b8db751c0dd980fccf2d65acb77a296f629 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 2 Jul 2019 15:35:53 +0800 Subject: arm64: perf: Remove unused macro ARMV8_EVENT_ATTR_RESOLVE became unused after commit <4b1a9e6934ec> ("arm64/perf: Filter common events based on PMCEIDn_EL0"). Remove it. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 96e90e270042..2d3bdebdf6df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -157,7 +157,6 @@ armv8pmu_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%03llx\n", pmu_attr->id); } -#define ARMV8_EVENT_ATTR_RESOLVE(m) #m #define ARMV8_EVENT_ATTR(name, config) \ PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ config, armv8pmu_events_sysfs_show) -- cgit v1.2.3 From c19d050f80881296aab3ba90fe5b2c107a238dcb Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Thu, 11 Jul 2019 17:27:32 +0530 Subject: arm64/kexec: Use consistent convention of initializing 'kxec_buf.mem' with KEXEC_BUF_MEM_UNKNOWN With commit b6664ba42f14 ("s390, kexec_file: drop arch_kexec_mem_walk()"), we introduced the KEXEC_BUF_MEM_UNKNOWN macro. If kexec_buf.mem is set to this value, kexec_locate_mem_hole() will try to allocate free memory. While other arch(s) like s390 and x86_64 already use this macro to initialize kexec_buf.mem with, arm64 uses an equivalent value of 0. Replace it with KEXEC_BUF_MEM_UNKNOWN, to keep the convention of initializing 'kxec_buf.mem' consistent across various archs. Cc: takahiro.akashi@linaro.org Cc: james.morse@arm.com Reviewed-by: Matthias Brugger Signed-off-by: Bhupesh Sharma Signed-off-by: Will Deacon --- arch/arm64/kernel/kexec_image.c | 2 +- arch/arm64/kernel/machine_kexec_file.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 2514fd6f12cb..29a9428486a5 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -84,7 +84,7 @@ static void *image_load(struct kimage *image, kbuf.buffer = kernel; kbuf.bufsz = kernel_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = le64_to_cpu(h->image_size); text_offset = le64_to_cpu(h->text_offset); kbuf.buf_align = MIN_KIMG_ALIGN; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 58871333737a..ba78ee7ca990 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -177,7 +177,7 @@ int load_other_segments(struct kimage *image, if (initrd) { kbuf.buffer = initrd; kbuf.bufsz = initrd_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = initrd_len; kbuf.buf_align = 0; /* within 1GB-aligned window of up to 32GB in size */ @@ -204,7 +204,7 @@ int load_other_segments(struct kimage *image, dtb_len = fdt_totalsize(dtb); kbuf.buffer = dtb; kbuf.bufsz = dtb_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = dtb_len; /* not across 2MB boundary */ kbuf.buf_align = SZ_2M; -- cgit v1.2.3 From b3e089cd446b26bb1e12860e1afb9da314453fd6 Mon Sep 17 00:00:00 2001 From: Chuhong Yuan Date: Tue, 30 Jul 2019 10:44:15 +0800 Subject: arm64: Replace strncmp with str_has_prefix In commit b6b2735514bc ("tracing: Use str_has_prefix() instead of using fixed sizes") the newly introduced str_has_prefix() was used to replace error-prone strncmp(str, const, len). Here fix codes with the same pattern. Signed-off-by: Chuhong Yuan Signed-off-by: Will Deacon --- arch/arm64/kernel/module-plts.c | 2 +- arch/arm64/mm/numa.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 044c0ae4d6c8..b182442b87a3 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -302,7 +302,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type, symbol index and addend */ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); - if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + if (!str_has_prefix(secstrings + dstsec->sh_name, ".init")) core_plts += count_plts(syms, rels, numrels, sechdrs[i].sh_info, dstsec); else diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4f241cc7cc3b..4decf1659700 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -29,7 +29,7 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) + if (str_has_prefix(opt, "off")) numa_off = true; return 0; -- cgit v1.2.3 From 332e5281a4e8269b96233a7babc98b03596b7e6d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jul 2019 08:14:19 +0100 Subject: arm64: esr: Add ESR exception class encoding for trapped ERET The ESR.EC encoding of 0b011010 (0x1a) describes an exception generated by an ERET, ERETAA or ERETAB instruction as a result of a nested virtualisation trap to EL2. Add an encoding for this EC and a string description so that we identify it correctly if we take one unexpectedly. Acked-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 3 ++- arch/arm64/kernel/traps.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 65ac18400979..cb29253ae86b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -34,7 +34,8 @@ #define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) -/* Unallocated EC: 0x1A - 0x1E */ +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d3313797cca9..42c8422cdf4a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -743,6 +743,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SMC64] = "SMC (AArch64)", [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", + [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", -- cgit v1.2.3 From 5cf896fb6be3effd9aea455b22213e27be8bdb1d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 31 Jul 2019 18:18:42 -0700 Subject: arm64: Add support for relocating the kernel with RELR relocations RELR is a relocation packing format for relative relocations. The format is described in a generic-abi proposal: https://groups.google.com/d/topic/generic-abi/bX460iggiKg/discussion The LLD linker can be instructed to pack relocations in the RELR format by passing the flag --pack-dyn-relocs=relr. This patch adds a new config option, CONFIG_RELR. Enabling this option instructs the linker to pack vmlinux's relative relocations in the RELR format, and causes the kernel to apply the relocations at startup along with the RELA relocations. RELA relocations still need to be applied because the linker will emit RELA relative relocations if they are unrepresentable in the RELR format (i.e. address not a multiple of 2). Enabling CONFIG_RELR reduces the size of a defconfig kernel image with CONFIG_RANDOMIZE_BASE by 3.5MB/16% uncompressed, or 550KB/5% compressed (lz4). Signed-off-by: Peter Collingbourne Tested-by: Nick Desaulniers Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- Makefile | 4 ++ arch/Kconfig | 14 ++++++ arch/arm64/Kconfig | 1 + arch/arm64/kernel/head.S | 96 ++++++++++++++++++++++++++++++++++++++--- arch/arm64/kernel/vmlinux.lds.S | 9 ++++ init/Kconfig | 3 ++ scripts/tools-support-relr.sh | 16 +++++++ 7 files changed, 137 insertions(+), 6 deletions(-) create mode 100755 scripts/tools-support-relr.sh (limited to 'arch/arm64/kernel') diff --git a/Makefile b/Makefile index 23cdf1f41364..9e6ec0c9962c 100644 --- a/Makefile +++ b/Makefile @@ -912,6 +912,10 @@ ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) endif +ifeq ($(CONFIG_RELR),y) +LDFLAGS_vmlinux += --pack-dyn-relocs=relr +endif + # insure the checker run with the right endianness CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian) diff --git a/arch/Kconfig b/arch/Kconfig index a7b57dd42c26..aa6bdb3df5c1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -925,6 +925,20 @@ config LOCK_EVENT_COUNTS the chance of application behavior change because of timing differences. The counts are reported via debugfs. +# Select if the architecture has support for applying RELR relocations. +config ARCH_HAS_RELR + bool + +config RELR + bool "Use RELR relocation packing" + depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR + default y + help + Store the kernel's dynamic relocations in the RELR relocation packing + format. Requires a compatible linker (LLD supports this feature), as + well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy + are compatible). + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3adcec05b1f6..2681eb79c40b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1467,6 +1467,7 @@ endif config RELOCATABLE bool + select ARCH_HAS_RELR help This builds the kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2cdacd1c141b..cc23302e9d95 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -102,6 +102,8 @@ pe_header: * x23 stext() .. start_kernel() physical misalignment/KASLR offset * x28 __create_page_tables() callee preserved temp register * x19/x20 __primary_switch() callee preserved temp registers + * x24 __primary_switch() .. relocate_kernel() + * current RELR displacement */ ENTRY(stext) bl preserve_boot_args @@ -834,14 +836,93 @@ __relocate_kernel: 0: cmp x9, x10 b.hs 1f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE + ldp x12, x13, [x9], #24 + ldr x14, [x9, #-8] + cmp w13, #R_AARCH64_RELATIVE b.ne 0b - add x13, x13, x23 // relocate - str x13, [x11, x23] + add x14, x14, x23 // relocate + str x14, [x12, x23] b 0b -1: ret + +1: +#ifdef CONFIG_RELR + /* + * Apply RELR relocations. + * + * RELR is a compressed format for storing relative relocations. The + * encoded sequence of entries looks like: + * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] + * + * i.e. start with an address, followed by any number of bitmaps. The + * address entry encodes 1 relocation. The subsequent bitmap entries + * encode up to 63 relocations each, at subsequent offsets following + * the last address entry. + * + * The bitmap entries must have 1 in the least significant bit. The + * assumption here is that an address cannot have 1 in lsb. Odd + * addresses are not supported. Any odd addresses are stored in the RELA + * section, which is handled above. + * + * Excluding the least significant bit in the bitmap, each non-zero + * bit in the bitmap represents a relocation to be applied to + * a corresponding machine word that follows the base address + * word. The second least significant bit represents the machine + * word immediately following the initial address, and each bit + * that follows represents the next word, in linear order. As such, + * a single bitmap can encode up to 63 relocations in a 64-bit object. + * + * In this implementation we store the address of the next RELR table + * entry in x9, the address being relocated by the current address or + * bitmap entry in x13 and the address being relocated by the current + * bit in x14. + * + * Because addends are stored in place in the binary, RELR relocations + * cannot be applied idempotently. We use x24 to keep track of the + * currently applied displacement so that we can correctly relocate if + * __relocate_kernel is called twice with non-zero displacements (i.e. + * if there is both a physical misalignment and a KASLR displacement). + */ + ldr w9, =__relr_offset // offset to reloc table + ldr w10, =__relr_size // size of reloc table + add x9, x9, x11 // __va(.relr) + add x10, x9, x10 // __va(.relr) + sizeof(.relr) + + sub x15, x23, x24 // delta from previous offset + cbz x15, 7f // nothing to do if unchanged + mov x24, x23 // save new offset + +2: cmp x9, x10 + b.hs 7f + ldr x11, [x9], #8 + tbnz x11, #0, 3f // branch to handle bitmaps + add x13, x11, x23 + ldr x12, [x13] // relocate address entry + add x12, x12, x15 + str x12, [x13], #8 // adjust to start of bitmap + b 2b + +3: mov x14, x13 +4: lsr x11, x11, #1 + cbz x11, 6f + tbz x11, #0, 5f // skip bit if not set + ldr x12, [x14] // relocate bit + add x12, x12, x15 + str x12, [x14] + +5: add x14, x14, #8 // move to next bit's address + b 4b + +6: /* + * Move to the next bitmap's address. 8 is the word size, and 63 is the + * number of significant bits in a bitmap entry. + */ + add x13, x13, #(8 * 63) + b 2b + +7: +#endif + ret + ENDPROC(__relocate_kernel) #endif @@ -854,6 +935,9 @@ __primary_switch: adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_RELR + mov x24, #0 // no RELR displacement yet +#endif bl __relocate_kernel #ifdef CONFIG_RANDOMIZE_BASE ldr x8, =__primary_switched diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7fa008374907..31716afa30f6 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -200,6 +200,15 @@ SECTIONS __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); __rela_size = SIZEOF(.rela.dyn); +#ifdef CONFIG_RELR + .relr.dyn : ALIGN(8) { + *(.relr.dyn) + } + + __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); + __relr_size = SIZEOF(.relr.dyn); +#endif + . = ALIGN(SEGMENT_ALIGN); __initdata_end = .; __init_end = .; diff --git a/init/Kconfig b/init/Kconfig index bd7d650d4a99..d96127ebc44e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -30,6 +30,9 @@ config CC_CAN_LINK config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) +config TOOLS_SUPPORT_RELR + def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) + config CC_HAS_WARN_MAYBE_UNINITIALIZED def_bool $(cc-option,-Wmaybe-uninitialized) help diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh new file mode 100755 index 000000000000..97a2c844a95e --- /dev/null +++ b/scripts/tools-support-relr.sh @@ -0,0 +1,16 @@ +#!/bin/sh -eu +# SPDX-License-Identifier: GPL-2.0 + +tmp_file=$(mktemp) +trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT + +cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1 +void *p = &p; +END +"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file + +# Despite printing an error message, GNU nm still exits with exit code 0 if it +# sees a relr section. So we need to check that nothing is printed to stderr. +test -z "$("$NM" $tmp_file 2>&1 >/dev/null)" + +"$OBJCOPY" -O binary $tmp_file $tmp_file.bin -- cgit v1.2.3 From 63f0c60379650d82250f22e4cf4137ef3dc4f43d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 23 Jul 2019 19:58:39 +0200 Subject: arm64: Introduce prctl() options to control the tagged user addresses ABI It is not desirable to relax the ABI to allow tagged user addresses into the kernel indiscriminately. This patch introduces a prctl() interface for enabling or disabling the tagged ABI with a global sysctl control for preventing applications from enabling the relaxed ABI (meant for testing user-space prctl() return error checking without reconfiguring the kernel). The ABI properties are inherited by threads of the same application and fork()'ed children but cleared on execve(). A Kconfig option allows the overall disabling of the relaxed ABI. The PR_SET_TAGGED_ADDR_CTRL will be expanded in the future to handle MTE-specific settings like imprecise vs precise exceptions. Reviewed-by: Kees Cook Signed-off-by: Catalin Marinas Signed-off-by: Andrey Konovalov Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 9 +++++ arch/arm64/include/asm/processor.h | 8 ++++ arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/include/asm/uaccess.h | 4 +- arch/arm64/kernel/process.c | 73 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/prctl.h | 5 +++ kernel/sys.c | 12 ++++++ 7 files changed, 111 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3adcec05b1f6..5d254178b9ca 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1110,6 +1110,15 @@ config ARM64_SW_TTBR0_PAN zeroed area and reserved ASID. The user access routines restore the valid TTBR0_EL1 temporarily. +config ARM64_TAGGED_ADDR_ABI + bool "Enable the tagged user addresses syscall ABI" + default y + help + When this option is enabled, user applications can opt in to a + relaxed ABI via prctl() allowing tagged addresses to be passed + to system calls as pointer arguments. For details, see + Documentation/arm64/tagged-address-abi.txt. + menuconfig COMPAT bool "Kernel support for 32-bit EL0" depends on ARM64_4K_PAGES || EXPERT diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 844e2964b0f5..28eed40ffc12 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -306,6 +306,14 @@ extern void __init minsigstksz_setup(void); /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ +long set_tagged_addr_ctrl(unsigned long arg); +long get_tagged_addr_ctrl(void); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl() +#endif + /* * For CONFIG_GCC_PLUGIN_STACKLEAK * diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 180b34ec5965..012238d8e58d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -90,6 +90,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SVE 23 /* Scalable Vector Extension in use */ #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ +#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index a138e3b4f717..097d6bfac0b7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -62,7 +62,9 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si { unsigned long ret, limit = current_thread_info()->addr_limit; - addr = untagged_addr(addr); + if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && + test_thread_flag(TIF_TAGGED_ADDR)) + addr = untagged_addr(addr); __chk_user_ptr(addr); asm volatile( diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f674f28df663..76b7c55026aa 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include #include #include @@ -307,11 +309,18 @@ static void tls_thread_flush(void) } } +static void flush_tagged_addr_state(void) +{ + if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI)) + clear_thread_flag(TIF_TAGGED_ADDR); +} + void flush_thread(void) { fpsimd_flush_thread(); tls_thread_flush(); flush_ptrace_hw_breakpoint(current); + flush_tagged_addr_state(); } void release_thread(struct task_struct *dead_task) @@ -565,3 +574,67 @@ void arch_setup_new_exec(void) ptrauth_thread_init_user(current); } + +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_prctl_allowed = 1; + +long set_tagged_addr_ctrl(unsigned long arg) +{ + if (!tagged_addr_prctl_allowed) + return -EINVAL; + if (is_compat_task()) + return -EINVAL; + if (arg & ~PR_TAGGED_ADDR_ENABLE) + return -EINVAL; + + update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + + return 0; +} + +long get_tagged_addr_ctrl(void) +{ + if (!tagged_addr_prctl_allowed) + return -EINVAL; + if (is_compat_task()) + return -EINVAL; + + if (test_thread_flag(TIF_TAGGED_ADDR)) + return PR_TAGGED_ADDR_ENABLE; + + return 0; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ +static int zero; +static int one = 1; + +static struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr", + .mode = 0644, + .data = &tagged_addr_prctl_allowed, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static int __init tagged_addr_init(void) +{ + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + return 0; +} + +core_initcall(tagged_addr_init); +#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 094bb03b9cc2..2e927b3e9d6c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -229,4 +229,9 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 2969304c29fe..c6c4d5358bd3 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -124,6 +124,12 @@ #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif +#ifndef SET_TAGGED_ADDR_CTRL +# define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) +#endif +#ifndef GET_TAGGED_ADDR_CTRL +# define GET_TAGGED_ADDR_CTRL() (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2492,6 +2498,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = PAC_RESET_KEYS(me, arg2); break; + case PR_SET_TAGGED_ADDR_CTRL: + error = SET_TAGGED_ADDR_CTRL(arg2); + break; + case PR_GET_TAGGED_ADDR_CTRL: + error = GET_TAGGED_ADDR_CTRL(); + break; default: error = -EINVAL; break; -- cgit v1.2.3 From b99286b088ea843b935dcfb29f187697359fe5cd Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 5 Aug 2019 23:05:03 -0400 Subject: arm64/prefetch: fix a -Wtype-limits warning The commit d5370f754875 ("arm64: prefetch: add alternative pattern for CPUs without a prefetcher") introduced MIDR_IS_CPU_MODEL_RANGE() to be used in has_no_hw_prefetch() with rv_min=0 which generates a compilation warning from GCC, In file included from ./arch/arm64/include/asm/cache.h:8, from ./include/linux/cache.h:6, from ./include/linux/printk.h:9, from ./include/linux/kernel.h:15, from ./include/linux/cpumask.h:10, from arch/arm64/kernel/cpufeature.c:11: arch/arm64/kernel/cpufeature.c: In function 'has_no_hw_prefetch': ./arch/arm64/include/asm/cputype.h:59:26: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ ^~ arch/arm64/kernel/cpufeature.c:889:9: note: in expansion of macro 'MIDR_IS_CPU_MODEL_RANGE' return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, ^~~~~~~~~~~~~~~~~~~~~~~ Fix it by converting MIDR_IS_CPU_MODEL_RANGE to a static inline function. Signed-off-by: Qian Cai Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 21 +++++++++++---------- arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index e7d46631cc42..b1454d117cd2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -51,14 +51,6 @@ #define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ MIDR_ARCHITECTURE_MASK) -#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \ -({ \ - u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \ - u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \ - \ - _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ - }) - #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 @@ -159,10 +151,19 @@ struct midr_range { #define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r) #define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) +static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min, + u32 rv_max) +{ + u32 _model = midr & MIDR_CPU_MODEL_MASK; + u32 rv = midr & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); + + return _model == model && rv >= rv_min && rv <= rv_max; +} + static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) { - return MIDR_IS_CPU_MODEL_RANGE(midr, range->model, - range->rv_min, range->rv_max); + return midr_is_cpu_model_range(midr, range->model, + range->rv_min, range->rv_max); } static inline bool diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d19d14ba9ae4..95201e5ff5e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -886,7 +886,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ u32 midr = read_cpuid_id(); /* Cavium ThunderX pass 1.x and 2.x */ - return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, + return midr_is_cpu_model_range(midr, MIDR_THUNDERX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -- cgit v1.2.3 From 08f103b9a9502974109fab47ea35ca8542c4e57a Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 7 Aug 2019 11:34:45 +0100 Subject: arm64/ptrace: Fix typoes in sve_set() comment The ptrace trace SVE flags are prefixed with SVE_PT_*. Update the comment accordingly. Reviewed-by: Dave Martin Signed-off-by: Julien Grall Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3cf3b135027e..21176d02e21a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -870,7 +870,7 @@ static int sve_set(struct task_struct *target, goto out; /* - * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by + * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * sve_set_vector_length(), which will also validate them for us: */ ret = sve_set_vector_length(target, header.vl, -- cgit v1.2.3 From 14c127c957c1c6070647c171e72f06e0db275ebf Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:14 +0100 Subject: arm64: mm: Flip kernel VA space In order to allow for a KASAN shadow that changes size at boot time, one must fix the KASAN_SHADOW_END for both 48 & 52-bit VAs and "grow" the start address. Also, it is highly desirable to maintain the same function addresses in the kernel .text between VA sizes. Both of these requirements necessitate us to flip the kernel address space halves s.t. the direct linear map occupies the lower addresses. This patch puts the direct linear map in the lower addresses of the kernel VA range and everything else in the higher ranges. We need to adjust: *) KASAN shadow region placement logic, *) KASAN_SHADOW_OFFSET computation logic, *) virt_to_phys, phys_to_virt checks, *) page table dumper. These are all small changes, that need to take place atomically, so they are bundled into this commit. As part of the re-arrangement, a guard region of 2MB (to preserve alignment for fixed map) is added after the vmemmap. Otherwise the vmemmap could intersect with IS_ERR pointers. Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- arch/arm64/include/asm/memory.h | 8 ++++---- arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/mm/dump.c | 5 +++-- arch/arm64/mm/init.c | 9 +-------- arch/arm64/mm/kasan_init.c | 6 +++--- arch/arm64/mm/mmu.c | 4 ++-- 8 files changed, 16 insertions(+), 22 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 61de992bbea3..61f7926fdeca 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -130,7 +130,7 @@ KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) # - (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) # in 32-bit arithmetic KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ - (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 1 - 32))) \ + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) \ - (1 << (64 - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) )) ) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 98fda92a2612..380594b1a0ba 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -38,9 +38,9 @@ */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define VA_START (UL(0xffffffffffffffff) - \ - (UL(1) << VA_BITS) + 1) -#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ (UL(1) << (VA_BITS - 1)) + 1) +#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ + (UL(1) << VA_BITS) + 1) #define KIMAGE_VADDR (MODULES_END) #define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) @@ -48,7 +48,7 @@ #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) -#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) +#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) @@ -231,7 +231,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) * space. Testing the top bit for the start of the region is a * sufficient check. */ -#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1))) +#define __is_lm_address(addr) (!((addr) & BIT(VA_BITS - 1))) #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5fdcfe237338..046b811309bb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -21,7 +21,7 @@ * and fixed mappings */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) +#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9341fcc6e809..e130db05d932 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -496,7 +496,7 @@ int swsusp_arch_resume(void) rc = -ENOMEM; goto out; } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, VA_START); if (rc) goto out; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 82b3a7fdb4a6..beec87488e97 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -26,6 +26,8 @@ #include static const struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { VA_START, "Linear Mapping end" }, #ifdef CONFIG_KASAN { KASAN_SHADOW_START, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, @@ -42,7 +44,6 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; @@ -376,7 +377,7 @@ static void ptdump_initialize(void) static struct ptdump_info kernel_ptdump_info = { .mm = &init_mm, .markers = address_markers, - .base_addr = VA_START, + .base_addr = PAGE_OFFSET, }; void ptdump_check_wx(void) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3c795278def..62927ed02229 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -301,7 +301,7 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = -(s64)PAGE_OFFSET; + const s64 linear_region_size = BIT(VA_BITS - 1); /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); @@ -309,13 +309,6 @@ void __init arm64_memblock_init(void) /* Remove memory above our supported physical address size */ memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); - /* - * Ensure that the linear region takes up exactly half of the kernel - * virtual address space. This way, we can distinguish a linear address - * from a kernel/module/vmalloc address by testing a single bit. - */ - BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); - /* * Select a suitable value for the base of physical memory. */ diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6cf97b904ebb..05edfe9b02e4 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -225,10 +225,10 @@ void __init kasan_init(void) kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)mod_shadow_start); + kasan_populate_early_shadow(kasan_mem_to_shadow((void *) VA_START), + (void *)mod_shadow_start); kasan_populate_early_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + (void *)KASAN_SHADOW_END); if (kimg_shadow_start > mod_shadow_end) kasan_populate_early_shadow((void *)mod_shadow_end, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 750a69dde39b..1d4247f9a496 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -398,7 +398,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= VA_START) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -425,7 +425,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= VA_START) && (virt < VMALLOC_START)) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; -- cgit v1.2.3 From 90ec95cda91a021d82351c976896a63aa364ebf1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:17 +0100 Subject: arm64: mm: Introduce VA_BITS_MIN In order to support 52-bit kernel addresses detectable at boot time, the kernel needs to know the most conservative VA_BITS possible should it need to fall back to this quantity due to lack of hardware support. A new compile time constant VA_BITS_MIN is introduced in this patch and it is employed in the KASAN end address, KASLR, and EFI stub. For Arm, if 52-bit VA support is unavailable the fallback is to 48-bits. In other words: VA_BITS_MIN = min (48, VA_BITS) Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/include/asm/efi.h | 4 ++-- arch/arm64/include/asm/memory.h | 9 ++++++++- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/kaslr.c | 6 +++--- arch/arm64/mm/kasan_init.c | 3 ++- 6 files changed, 17 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 76a144702586..b54d3a86c444 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -79,7 +79,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) /* * On arm64, we have to ensure that the initrd ends up in the linear region, - * which is a 1 GB aligned region of size '1UL << (VA_BITS - 1)' that is + * which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the @@ -90,7 +90,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, unsigned long image_addr) { - return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS - 1)); + return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 968659c90f5c..79e75e45d560 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -52,6 +52,13 @@ #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) +#if VA_BITS > 48 +#define VA_BITS_MIN (48) +#else +#define VA_BITS_MIN (VA_BITS) +#endif +#define _VA_START(va) (UL(0xffffffffffffffff) - \ + (UL(1) << ((va) - 1)) + 1) #define KERNEL_START _text #define KERNEL_END _end @@ -74,7 +81,7 @@ #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 -#define KASAN_SHADOW_END (VA_START) +#define KASAN_SHADOW_END (_VA_START(VA_BITS_MIN)) #endif #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 844e2964b0f5..0e1f2770192a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -42,7 +42,7 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ -#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS) +#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN) #define TASK_SIZE_64 (UL(1) << vabits_user) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2cdacd1c141b..ac58c69993ec 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -314,7 +314,7 @@ __create_page_tables: mov x5, #52 cbnz x6, 1f #endif - mov x5, #VA_BITS + mov x5, #VA_BITS_MIN 1: adr_l x6, vabits_user str x5, [x6] diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 708051655ad9..5a59f7567f9c 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -116,15 +116,15 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the - * middle half of the VMALLOC area (VA_BITS - 2), and stay clear of + * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of * the lower and upper quarters to avoid colliding with other * allocations. * Even if we could randomize at page granularity for 16k and 64k pages, * let's always round to 2 MB so we don't interfere with the ability to * map using contiguous PTEs */ - mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); - offset = BIT(VA_BITS - 3) + (seed & mask); + mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); + offset = BIT(VA_BITS_MIN - 3) + (seed & mask); /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 05edfe9b02e4..725222271474 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -156,7 +156,8 @@ asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); -- cgit v1.2.3 From 5383cc6efed13784ddb3cff2cc183b6b8c50c8db Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:18 +0100 Subject: arm64: mm: Introduce vabits_actual In order to support 52-bit kernel addresses detectable at boot time, one needs to know the actual VA_BITS detected. A new variable vabits_actual is introduced in this commit and employed for the KVM hypervisor layout, KASAN, fault handling and phys-to/from-virt translation where there would normally be compile time constants. In order to maintain performance in phys_to_virt, another variable physvirt_offset is introduced. Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/include/asm/kasan.h | 2 +- arch/arm64/include/asm/memory.h | 11 ++++++----- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/kernel/head.S | 5 +++++ arch/arm64/kvm/va_layout.c | 14 +++++++------- arch/arm64/mm/fault.c | 4 ++-- arch/arm64/mm/init.c | 7 ++++++- arch/arm64/mm/mmu.c | 3 +++ 8 files changed, 31 insertions(+), 17 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 10d2add842da..b0dc4abc3589 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -31,7 +31,7 @@ * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) */ #define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT))) -#define KASAN_SHADOW_START _KASAN_SHADOW_START(VA_BITS) +#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) void kasan_init(void); void kasan_copy_shadow(pgd_t *pgdir); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 79e75e45d560..364635b8370a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -37,8 +37,6 @@ * VA_START - the first kernel virtual address. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) - \ - (UL(1) << (VA_BITS - 1)) + 1) #define PAGE_OFFSET (UL(0xffffffffffffffff) - \ (UL(1) << VA_BITS) + 1) #define KIMAGE_VADDR (MODULES_END) @@ -166,10 +164,13 @@ #endif #ifndef __ASSEMBLY__ +extern u64 vabits_actual; +#define VA_START (_VA_START(vabits_actual)) #include #include +extern s64 physvirt_offset; extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) @@ -240,9 +241,9 @@ static inline const void *__tag_set(const void *addr, u8 tag) * space. Testing the top bit for the start of the region is a * sufficient check. */ -#define __is_lm_address(addr) (!((addr) & BIT(VA_BITS - 1))) +#define __is_lm_address(addr) (!((addr) & BIT(vabits_actual - 1))) -#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) +#define __lm_to_phys(addr) (((addr) + physvirt_offset)) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ @@ -261,7 +262,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_addr_symbol(x) __pa_symbol_nodebug(x) #endif -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) +#define __phys_to_virt(x) ((unsigned long)((x) - physvirt_offset)) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 7ed0adb187a8..670003a55d28 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -95,7 +95,7 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) isb(); } -#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)) #define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ac58c69993ec..6dc7349868d9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -321,6 +321,11 @@ __create_page_tables: dmb sy dc ivac, x6 // Invalidate potentially stale cache line + adr_l x6, vabits_actual + str x5, [x6] + dmb sy + dc ivac, x6 // Invalidate potentially stale cache line + /* * VA_BITS may be too small to allow for an ID mapping to be created * that covers system RAM if that is located sufficiently high in the diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index acd8084f1f2c..2cf7d4b606c3 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -29,25 +29,25 @@ static void compute_layout(void) int kva_msb; /* Where is my RAM region? */ - hyp_va_msb = idmap_addr & BIT(VA_BITS - 1); - hyp_va_msb ^= BIT(VA_BITS - 1); + hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); + hyp_va_msb ^= BIT(vabits_actual - 1); kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (VA_BITS - 1)) { + if (kva_msb == (vabits_actual - 1)) { /* * No space in the address, let's compute the mask so - * that it covers (VA_BITS - 1) bits, and the region + * that it covers (vabits_actual - 1) bits, and the region * bit. The tag stays set to zero. */ - va_mask = BIT(VA_BITS - 1) - 1; + va_mask = BIT(vabits_actual - 1) - 1; va_mask |= hyp_va_msb; } else { /* * We do have some free bits to insert a random tag. * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == VA_BITS): + * using the following formula (with V == vabits_actual): * * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 * --------------------------------------------------------- @@ -55,7 +55,7 @@ static void compute_layout(void) */ tag_lsb = kva_msb; va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb); + tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); tag_val |= hyp_va_msb; tag_val >>= tag_lsb; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cfd65b63f36f..6b195871769a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -138,9 +138,9 @@ static void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? VA_BITS : (int)vabits_user, + mm == &init_mm ? vabits_actual : (int)vabits_user, (unsigned long)virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 62927ed02229..e752f46d430e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -50,6 +50,9 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); +s64 physvirt_offset __ro_after_init; +EXPORT_SYMBOL(physvirt_offset); + phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -301,7 +304,7 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = BIT(VA_BITS - 1); + const s64 linear_region_size = BIT(vabits_actual - 1); /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); @@ -315,6 +318,8 @@ void __init arm64_memblock_init(void) memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + physvirt_offset = PHYS_OFFSET - PAGE_OFFSET; + /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1d4247f9a496..07b30e6d17f8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -43,6 +43,9 @@ u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 vabits_user __ro_after_init; EXPORT_SYMBOL(vabits_user); +u64 __section(".mmuoff.data.write") vabits_actual; +EXPORT_SYMBOL(vabits_actual); + u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); -- cgit v1.2.3 From c812026c54cfaec23fa1d78cdbfd0e56e787470a Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:19 +0100 Subject: arm64: mm: Logic to make offset_ttbr1 conditional When running with a 52-bit userspace VA and a 48-bit kernel VA we offset ttbr1_el1 to allow the kernel pagetables with a 52-bit PTRS_PER_PGD to be used for both userspace and kernel. Moving on to a 52-bit kernel VA we no longer require this offset to ttbr1_el1 should we be running on a system with HW support for 52-bit VAs. This patch introduces conditional logic to offset_ttbr1 to query SYS_ID_AA64MMFR2_EL1 whenever 52-bit VAs are selected. If there is HW support for 52-bit VAs then the ttbr1 offset is skipped. We choose to read a system register rather than vabits_actual because offset_ttbr1 can be called in places where the kernel data is not actually mapped. Calls to offset_ttbr1 appear to be made from rarely called code paths so this extra logic is not expected to adversely affect performance. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 12 ++++++++++-- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/hibernate-asm.S | 8 ++++---- arch/arm64/mm/proc.S | 6 +++--- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e3a15c751b13..ede368bafa2c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -538,9 +538,17 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * In future this may be nop'ed out when dealing with 52-bit kernel VAs. * ttbr: Value of ttbr to set, modified. */ - .macro offset_ttbr1, ttbr + .macro offset_ttbr1, ttbr, tmp #ifdef CONFIG_ARM64_USER_VA_BITS_52 orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +#endif + +#ifdef CONFIG_ARM64_VA_BITS_52 + mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 + and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + cbnz \tmp, .Lskipoffs_\@ + orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +.Lskipoffs_\@ : #endif .endm @@ -550,7 +558,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * to be nop'ed out when dealing with 52-bit kernel VAs. */ .macro restore_ttbr1, ttbr -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#if defined(CONFIG_ARM64_USER_VA_BITS_52) || defined(CONFIG_ARM64_VA_BITS_52) bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET #endif .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6dc7349868d9..a96dc4386c7c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -777,7 +777,7 @@ ENTRY(__enable_mmu) phys_to_ttbr x1, x1 phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 - offset_ttbr1 x1 + offset_ttbr1 x1, x3 msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 2f4a2ce7264b..38bcd4d4e43b 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -22,14 +22,14 @@ * Even switching to our copied tables will cause a changed output address at * each stage of the walk. */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp +.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 phys_to_ttbr \tmp, \zero_page msr ttbr1_el1, \tmp isb tlbi vmalle1 dsb nsh phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp + offset_ttbr1 \tmp, \tmp2 msr ttbr1_el1, \tmp isb .endm @@ -70,7 +70,7 @@ ENTRY(swsusp_arch_suspend_exit) * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page */ - break_before_make_ttbr_switch x5, x0, x6 + break_before_make_ttbr_switch x5, x0, x6, x8 mov x21, x1 mov x30, x2 @@ -101,7 +101,7 @@ ENTRY(swsusp_arch_suspend_exit) dsb ish /* wait for PoU cleaning to finish */ /* switch to the restored kernels page tables */ - break_before_make_ttbr_switch x25, x21, x6 + break_before_make_ttbr_switch x25, x21, x6, x8 ic ialluis dsb ish diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7dbf2be470f6..8d289ff7584d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -168,7 +168,7 @@ ENDPROC(cpu_do_switch_mm) .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, empty_zero_page phys_to_ttbr \tmp2, \tmp1 - offset_ttbr1 \tmp2 + offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 isb tlbi vmalle1 @@ -187,7 +187,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - offset_ttbr1 x0 + offset_ttbr1 x0, x3 msr ttbr1_el1, x0 isb @@ -362,7 +362,7 @@ __idmap_kpti_secondary: cbnz w18, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb + offset_ttbr1 swapper_ttb, x18 msr ttbr1_el1, swapper_ttb isb ret -- cgit v1.2.3 From b6d00d47e81a49f6cf462518c10408f37a3e6785 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:22 +0100 Subject: arm64: mm: Introduce 52-bit Kernel VAs Most of the machinery is now in place to enable 52-bit kernel VAs that are detectable at boot time. This patch adds a Kconfig option for 52-bit user and kernel addresses and plumbs in the requisite CONFIG_ macros as well as sets TCR.T1SZ, physvirt_offset and vmemmap at early boot. To simplify things this patch also removes the 52-bit user/48-bit kernel kconfig option. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 20 +++++++++++--------- arch/arm64/include/asm/assembler.h | 13 ++++++++----- arch/arm64/include/asm/memory.h | 7 ++++--- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- arch/arm64/kernel/head.S | 4 ++-- arch/arm64/mm/init.c | 10 ++++++++++ arch/arm64/mm/proc.S | 3 ++- 8 files changed, 39 insertions(+), 22 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f7f23e47c28f..f5f7cb75a698 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -286,7 +286,7 @@ config PGTABLE_LEVELS int default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 - default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52) + default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 @@ -300,12 +300,12 @@ config ARCH_PROC_KCORE_TEXT config KASAN_SHADOW_OFFSET hex depends on KASAN - default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52) && !KASAN_SW_TAGS + default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS - default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52) && KASAN_SW_TAGS + default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS @@ -759,13 +759,14 @@ config ARM64_VA_BITS_47 config ARM64_VA_BITS_48 bool "48-bit" -config ARM64_USER_VA_BITS_52 - bool "52-bit (user)" +config ARM64_VA_BITS_52 + bool "52-bit" depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN) help Enable 52-bit virtual addressing for userspace when explicitly - requested via a hint to mmap(). The kernel will continue to - use 48-bit virtual addresses for its own mappings. + requested via a hint to mmap(). The kernel will also use 52-bit + virtual addresses for its own mappings (provided HW support for + this feature is available, otherwise it reverts to 48-bit). NOTE: Enabling 52-bit virtual addressing in conjunction with ARMv8.3 Pointer Authentication will result in the PAC being @@ -778,7 +779,7 @@ endchoice config ARM64_FORCE_52BIT bool "Force 52-bit virtual addresses for userspace" - depends on ARM64_USER_VA_BITS_52 && EXPERT + depends on ARM64_VA_BITS_52 && EXPERT help For systems with 52-bit userspace VAs enabled, the kernel will attempt to maintain compatibility with older software by providing 48-bit VAs @@ -795,7 +796,8 @@ config ARM64_VA_BITS default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 default 47 if ARM64_VA_BITS_47 - default 48 if ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52 + default 48 if ARM64_VA_BITS_48 + default 52 if ARM64_VA_BITS_52 choice prompt "Physical address space size" diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ede368bafa2c..c066fc4976cd 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -349,6 +349,13 @@ alternative_endif bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH .endm +/* + * tcr_set_t1sz - update TCR.T1SZ + */ + .macro tcr_set_t1sz, valreg, t1sz + bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH + .endm + /* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value @@ -539,10 +546,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * ttbr: Value of ttbr to set, modified. */ .macro offset_ttbr1, ttbr, tmp -#ifdef CONFIG_ARM64_USER_VA_BITS_52 - orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET -#endif - #ifdef CONFIG_ARM64_VA_BITS_52 mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT) @@ -558,7 +561,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * to be nop'ed out when dealing with 52-bit kernel VAs. */ .macro restore_ttbr1, ttbr -#if defined(CONFIG_ARM64_USER_VA_BITS_52) || defined(CONFIG_ARM64_VA_BITS_52) +#ifdef CONFIG_ARM64_VA_BITS_52 bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET #endif .endm diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0204c2006c92..d911d0573460 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -44,8 +44,9 @@ * VA_START - the first kernel virtual address. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ - (UL(1) << VA_BITS) + 1) +#define _PAGE_OFFSET(va) (UL(0xffffffffffffffff) - \ + (UL(1) << (va)) + 1) +#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) #define KIMAGE_VADDR (MODULES_END) #define BPF_JIT_REGION_START (KASAN_SHADOW_END) #define BPF_JIT_REGION_SIZE (SZ_128M) @@ -68,7 +69,7 @@ #define KERNEL_START _text #define KERNEL_END _end -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 #define MAX_USER_VA_BITS 52 #else #define MAX_USER_VA_BITS VA_BITS diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 670003a55d28..3827ff4040a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,7 +63,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52)) + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) return false; return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index db92950bb1a0..3df60f97da1f 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -304,7 +304,7 @@ #define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) #endif -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 /* Must be at least 64-byte aligned to prevent corruption of the TTBR */ #define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \ (UL(1) << (48 - PGDIR_SHIFT))) * 8) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a96dc4386c7c..c8446f8c81f5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -308,7 +308,7 @@ __create_page_tables: adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 mrs_s x6, SYS_ID_AA64MMFR2_EL1 and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) mov x5, #52 @@ -794,7 +794,7 @@ ENTRY(__enable_mmu) ENDPROC(__enable_mmu) ENTRY(__cpu_secondary_check52bitva) -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 ldr_l x0, vabits_user cmp x0, #52 b.ne 2f diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 2940221e5519..531c497c5758 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -325,6 +325,16 @@ void __init arm64_memblock_init(void) vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)); + /* + * If we are running with a 52-bit kernel VA config on a system that + * does not support it, we have to offset our vmemmap and physvirt_offset + * s.t. we avoid the 52-bit portion of the direct linear map + */ + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) { + vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT; + physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48); + } + /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8d289ff7584d..8b021c5c0884 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -438,10 +438,11 @@ ENTRY(__cpu_setup) TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS tcr_clear_errata_bits x10, x9, x5 -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 ldr_l x9, vabits_user sub x9, xzr, x9 add x9, x9, #64 + tcr_set_t1sz x10, x9 #else ldr_l x9, idmap_t0sz #endif -- cgit v1.2.3 From 2c624fe68715e76eba1a7089f91e122310dc663c Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 7 Aug 2019 16:55:23 +0100 Subject: arm64: mm: Remove vabits_user Previous patches have enabled 52-bit kernel + user VAs and there is no longer any scenario where user VA != kernel VA size. This patch removes the, now redundant, vabits_user variable and replaces usage with vabits_actual where appropriate. Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 3 --- arch/arm64/include/asm/pointer_auth.h | 2 +- arch/arm64/include/asm/processor.h | 2 +- arch/arm64/kernel/head.S | 7 +------ arch/arm64/mm/fault.c | 3 +-- arch/arm64/mm/mmu.c | 2 -- arch/arm64/mm/proc.S | 2 +- 7 files changed, 5 insertions(+), 16 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d911d0573460..ecc945ba8607 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -194,9 +194,6 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } -/* the actual size of a user virtual address */ -extern u64 vabits_user; - /* * Allow all memory at the discovery stage. We will clip it later. */ diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index d328540cb85e..7a24bad1a58b 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -69,7 +69,7 @@ extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); * The EL0 pointer bits used by a pointer authentication code. * This is dependent on TBI0 being enabled, or bits 63:56 would also apply. */ -#define ptrauth_user_pac_mask() GENMASK(54, vabits_user) +#define ptrauth_user_pac_mask() GENMASK(54, vabits_actual) /* Only valid for EL0 TTBR0 instruction pointers */ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 0e1f2770192a..e4c93945e477 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -43,7 +43,7 @@ */ #define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN) -#define TASK_SIZE_64 (UL(1) << vabits_user) +#define TASK_SIZE_64 (UL(1) << vabits_actual) #ifdef CONFIG_COMPAT #if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c8446f8c81f5..949b001a73bb 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -316,11 +316,6 @@ __create_page_tables: #endif mov x5, #VA_BITS_MIN 1: - adr_l x6, vabits_user - str x5, [x6] - dmb sy - dc ivac, x6 // Invalidate potentially stale cache line - adr_l x6, vabits_actual str x5, [x6] dmb sy @@ -795,7 +790,7 @@ ENDPROC(__enable_mmu) ENTRY(__cpu_secondary_check52bitva) #ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x0, vabits_user + ldr_l x0, vabits_actual cmp x0, #52 b.ne 2f diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6b195871769a..75eff57bd9ef 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -140,8 +140,7 @@ static void show_pte(unsigned long addr) pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? vabits_actual : (int)vabits_user, - (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, (unsigned long)virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 07b30e6d17f8..0c8f7e55f859 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,8 +40,6 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; -u64 vabits_user __ro_after_init; -EXPORT_SYMBOL(vabits_user); u64 __section(".mmuoff.data.write") vabits_actual; EXPORT_SYMBOL(vabits_actual); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8b021c5c0884..391f9cabfe60 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -439,7 +439,7 @@ ENTRY(__cpu_setup) tcr_clear_errata_bits x10, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 - ldr_l x9, vabits_user + ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 tcr_set_t1sz x10, x9 -- cgit v1.2.3 From 788961462f3471617749edf10d0fcafad410d2bb Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 9 Aug 2019 12:03:11 +0100 Subject: ARM: psci: cpuidle: Enable PSCI CPUidle driver Allow selection of the PSCI CPUidle in the kernel by updating the respective Kconfig entry. Remove PSCI callbacks from ARM/ARM64 generic CPU ops to prevent the PSCI idle driver from clashing with the generic ARM CPUidle driver initialization, that relies on CPU ops to initialize and enter idle states. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Ulf Hansson Cc: Will Deacon Cc: Ulf Hansson Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Catalin Marinas Cc: Mark Rutland Cc: "Rafael J. Wysocki" Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 7 ++++--- arch/arm64/kernel/psci.c | 4 ---- drivers/cpuidle/Kconfig.arm | 2 +- drivers/firmware/psci/psci.c | 10 ---------- 4 files changed, 5 insertions(+), 18 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index d1048173fd8a..619e0ebb8399 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -48,15 +49,15 @@ int arm_cpuidle_suspend(int index) int acpi_processor_ffh_lpi_probe(unsigned int cpu) { - return arm_cpuidle_init(cpu); + return psci_cpu_init_idle(cpu); } int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION(arm_cpuidle_suspend, + return CPU_PM_CPU_IDLE_ENTER_RETENTION(psci_cpu_suspend_enter, lpi->index); else - return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); + return CPU_PM_CPU_IDLE_ENTER(psci_cpu_suspend_enter, lpi->index); } #endif diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 85ee7d07889e..a543ab7e007c 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -105,10 +105,6 @@ static int cpu_psci_cpu_kill(unsigned int cpu) const struct cpu_operations cpu_psci_ops = { .name = "psci", -#ifdef CONFIG_CPU_IDLE - .cpu_init_idle = psci_cpu_init_idle, - .cpu_suspend = psci_cpu_suspend_enter, -#endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index eb014aa5ce6b..d8530475493c 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -14,7 +14,7 @@ config ARM_CPUIDLE provided by architecture code. config ARM_PSCI_CPUIDLE - bool + bool "PSCI CPU idle Driver" depends on ARM_PSCI_FW select DT_IDLE_STATES select CPU_IDLE_MULTIPLE_DRIVERS diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index f82ccd39a913..b343f8a34c6a 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -436,16 +436,6 @@ int psci_cpu_suspend_enter(unsigned long index) return ret; } - -/* ARM specific CPU idle operations */ -#ifdef CONFIG_ARM -static const struct cpuidle_ops psci_cpuidle_ops __initconst = { - .suspend = psci_cpu_suspend_enter, - .init = psci_dt_cpu_init_idle, -}; - -CPUIDLE_METHOD_OF_DECLARE(psci, "psci", &psci_cpuidle_ops); -#endif #endif static int psci_system_suspend(unsigned long unused) -- cgit v1.2.3 From 9ffeb6d08c3a4bbd7b1e33711b241f511e2ded79 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 9 Aug 2019 12:03:12 +0100 Subject: PSCI: cpuidle: Refactor CPU suspend power_state parameter handling Current PSCI code handles idle state entry through the psci_cpu_suspend_enter() API, that takes an idle state index as a parameter and convert the index into a previously initialized power_state parameter before calling the PSCI.CPU_SUSPEND() with it. This is unwieldly, since it forces the PSCI firmware layer to keep track of power_state parameter for every idle state so that the index->power_state conversion can be made in the PSCI firmware layer instead of the CPUidle driver implementations. Move the power_state handling out of drivers/firmware/psci into the respective ACPI/DT PSCI CPUidle backends and convert the psci_cpu_suspend_enter() API to get the power_state parameter as input, which makes it closer to its firmware interface PSCI.CPU_SUSPEND() API. A notable side effect is that the PSCI ACPI/DT CPUidle backends now can directly handle (and if needed update) power_state parameters before handing them over to the PSCI firmware interface to trigger PSCI.CPU_SUSPEND() calls. Signed-off-by: Lorenzo Pieralisi Acked-by: Daniel Lezcano Reviewed-by: Ulf Hansson Reviewed-by: Sudeep Holla Cc: Will Deacon Cc: Ulf Hansson Cc: Sudeep Holla Cc: Daniel Lezcano Cc: Catalin Marinas Cc: Mark Rutland Cc: "Rafael J. Wysocki" Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 49 +++++++++++-- drivers/cpuidle/cpuidle-psci.c | 87 ++++++++++++++++++++++- drivers/firmware/psci/psci.c | 157 +++-------------------------------------- include/linux/cpuidle.h | 17 +++-- include/linux/psci.h | 4 +- 5 files changed, 154 insertions(+), 160 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 619e0ebb8399..e4d6af2fdec7 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -47,17 +47,58 @@ int arm_cpuidle_suspend(int index) #define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) +static int psci_acpi_cpu_init_idle(unsigned int cpu) +{ + int i, count; + struct acpi_lpi_state *lpi; + struct acpi_processor *pr = per_cpu(processors, cpu); + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + + count = pr->power.count - 1; + if (count <= 0) + return -ENODEV; + + for (i = 0; i < count; i++) { + u32 state; + + lpi = &pr->power.lpi_states[i + 1]; + /* + * Only bits[31:0] represent a PSCI power_state while + * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification + */ + state = lpi->address; + if (!psci_power_state_is_valid(state)) { + pr_warn("Invalid PSCI power state %#x\n", state); + return -EINVAL; + } + } + + return 0; +} + int acpi_processor_ffh_lpi_probe(unsigned int cpu) { - return psci_cpu_init_idle(cpu); + return psci_acpi_cpu_init_idle(cpu); } int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { + u32 state = lpi->address; + if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION(psci_cpu_suspend_enter, - lpi->index); + return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter, + lpi->index, state); else - return CPU_PM_CPU_IDLE_ENTER(psci_cpu_suspend_enter, lpi->index); + return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + lpi->index, state); } #endif diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index ab1dea918ea3..f3c1a2396f98 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -22,10 +22,15 @@ #include "dt_idle_states.h" +static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); + static int psci_enter_idle_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int idx) { - return CPU_PM_CPU_IDLE_ENTER(psci_cpu_suspend_enter, idx); + u32 *state = __this_cpu_read(psci_power_state); + + return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + idx, state[idx - 1]); } static struct cpuidle_driver psci_idle_driver __initdata = { @@ -51,6 +56,86 @@ static const struct of_device_id psci_idle_state_match[] __initconst = { { }, }; +static int __init psci_dt_parse_state_node(struct device_node *np, u32 *state) +{ + int err = of_property_read_u32(np, "arm,psci-suspend-param", state); + + if (err) { + pr_warn("%pOF missing arm,psci-suspend-param property\n", np); + return err; + } + + if (!psci_power_state_is_valid(*state)) { + pr_warn("Invalid PSCI power state %#x\n", *state); + return -EINVAL; + } + + return 0; +} + +static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) +{ + int i, ret = 0, count = 0; + u32 *psci_states; + struct device_node *state_node; + + /* Count idle states */ + while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", + count))) { + count++; + of_node_put(state_node); + } + + if (!count) + return -ENODEV; + + psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); + if (!psci_states) + return -ENOMEM; + + for (i = 0; i < count; i++) { + state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); + ret = psci_dt_parse_state_node(state_node, &psci_states[i]); + of_node_put(state_node); + + if (ret) + goto free_mem; + + pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); + } + + /* Idle states parsed correctly, initialize per-cpu pointer */ + per_cpu(psci_power_state, cpu) = psci_states; + return 0; + +free_mem: + kfree(psci_states); + return ret; +} + +static __init int psci_cpu_init_idle(unsigned int cpu) +{ + struct device_node *cpu_node; + int ret; + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) + return -ENODEV; + + ret = psci_dt_cpu_init_idle(cpu_node, cpu); + + of_node_put(cpu_node); + + return ret; +} + static int __init psci_idle_init_cpu(int cpu) { struct cpuidle_driver *drv; diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index b343f8a34c6a..84f4ff351c62 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -103,7 +103,7 @@ static inline bool psci_power_state_loses_context(u32 state) return state & mask; } -static inline bool psci_power_state_is_valid(u32 state) +bool psci_power_state_is_valid(u32 state) { const u32 valid_mask = psci_has_ext_power_state() ? PSCI_1_0_EXT_POWER_STATE_MASK : @@ -277,162 +277,21 @@ static int __init psci_features(u32 psci_func_id) } #ifdef CONFIG_CPU_IDLE -static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); - -static int psci_dt_parse_state_node(struct device_node *np, u32 *state) -{ - int err = of_property_read_u32(np, "arm,psci-suspend-param", state); - - if (err) { - pr_warn("%pOF missing arm,psci-suspend-param property\n", np); - return err; - } - - if (!psci_power_state_is_valid(*state)) { - pr_warn("Invalid PSCI power state %#x\n", *state); - return -EINVAL; - } - - return 0; -} - -static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) -{ - int i, ret = 0, count = 0; - u32 *psci_states; - struct device_node *state_node; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - ret = psci_dt_parse_state_node(state_node, &psci_states[i]); - of_node_put(state_node); - - if (ret) - goto free_mem; - - pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); - } - - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} - -#ifdef CONFIG_ACPI -#include - -static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) -{ - int i, count; - u32 *psci_states; - struct acpi_lpi_state *lpi; - struct acpi_processor *pr = per_cpu(processors, cpu); - - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 state; - - lpi = &pr->power.lpi_states[i + 1]; - /* - * Only bits[31:0] represent a PSCI power_state while - * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification - */ - state = lpi->address; - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - kfree(psci_states); - return -EINVAL; - } - psci_states[i] = state; - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; -} -#else -static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) -{ - return -EINVAL; -} -#endif - -int psci_cpu_init_idle(unsigned int cpu) -{ - struct device_node *cpu_node; - int ret; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - if (!acpi_disabled) - return psci_acpi_cpu_init_idle(cpu); - - cpu_node = of_get_cpu_node(cpu, NULL); - if (!cpu_node) - return -ENODEV; - - ret = psci_dt_cpu_init_idle(cpu_node, cpu); - - of_node_put(cpu_node); - - return ret; -} - -static int psci_suspend_finisher(unsigned long index) +static int psci_suspend_finisher(unsigned long state) { - u32 *state = __this_cpu_read(psci_power_state); + u32 power_state = state; - return psci_ops.cpu_suspend(state[index - 1], - __pa_symbol(cpu_resume)); + return psci_ops.cpu_suspend(power_state, __pa_symbol(cpu_resume)); } -int psci_cpu_suspend_enter(unsigned long index) +int psci_cpu_suspend_enter(u32 state) { int ret; - u32 *state = __this_cpu_read(psci_power_state); - /* - * idle state index 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!index)) - return -EINVAL; - if (!psci_power_state_loses_context(state[index - 1])) - ret = psci_ops.cpu_suspend(state[index - 1], 0); + if (!psci_power_state_loses_context(state)) + ret = psci_ops.cpu_suspend(state, 0); else - ret = cpu_suspend(index, psci_suspend_finisher); + ret = cpu_suspend(state, psci_suspend_finisher); return ret; } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb9a0db89f1a..12ae4b87494e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -256,7 +256,10 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} #endif -#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, is_retention) \ +#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, \ + idx, \ + state, \ + is_retention) \ ({ \ int __ret = 0; \ \ @@ -268,7 +271,7 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ - __ret = low_level_idle_enter(idx); \ + __ret = low_level_idle_enter(state); \ if (!is_retention) \ cpu_pm_exit(); \ } \ @@ -277,9 +280,15 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) }) #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0) #define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1) + +#define CPU_PM_CPU_IDLE_ENTER_PARAM(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0) + +#define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1) #endif /* _LINUX_CPUIDLE_H */ diff --git a/include/linux/psci.h b/include/linux/psci.h index a8a15613c157..e2bacc6fd2f2 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -15,8 +15,8 @@ bool psci_tos_resident_on(int cpu); -int psci_cpu_init_idle(unsigned int cpu); -int psci_cpu_suspend_enter(unsigned long index); +int psci_cpu_suspend_enter(u32 state); +bool psci_power_state_is_valid(u32 state); enum psci_conduit { PSCI_CONDUIT_NONE, -- cgit v1.2.3 From 98dc19902a0b2e5348e43d6a2c39a0a7d0fc639e Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 8 Aug 2019 15:40:07 -0500 Subject: arm64: topology: Use PPTT to determine if PE is a thread ACPI 6.3 adds a thread flag to represent if a CPU/PE is actually a thread. Given that the MPIDR_MT bit may not represent this information consistently on homogeneous machines we should prefer the PPTT flag if its available. Signed-off-by: Jeremy Linton Reviewed-by: Sudeep Holla Reviewed-by: Robert Richter [will: made acpi_cpu_is_threaded() return 'bool'] Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0825c4a856e3..6106c49f84bc 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -340,17 +340,28 @@ void remove_cpu_topology(unsigned int cpu) } #ifdef CONFIG_ACPI +static bool __init acpi_cpu_is_threaded(int cpu) +{ + int is_threaded = acpi_pptt_cpu_is_thread(cpu); + + /* + * if the PPTT doesn't have thread information, assume a homogeneous + * machine and return the current CPU's thread state. + */ + if (is_threaded < 0) + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; + + return !!is_threaded; +} + /* * Propagate the topology information of the processor_topology_node tree to the * cpu_topology array. */ static int __init parse_acpi_topology(void) { - bool is_threaded; int cpu, topology_id; - is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; - for_each_possible_cpu(cpu) { int i, cache_id; @@ -358,7 +369,7 @@ static int __init parse_acpi_topology(void) if (topology_id < 0) return topology_id; - if (is_threaded) { + if (acpi_cpu_is_threaded(cpu)) { cpu_topology[cpu].thread_id = topology_id; topology_id = find_acpi_cpu_topology(cpu, 1); cpu_topology[cpu].core_id = topology_id; -- cgit v1.2.3 From 80d838122643a09a9f99824adea4b4261e4451e6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 12 Aug 2019 14:50:45 -0700 Subject: arm64: prefer __section from compiler_attributes.h GCC unescapes escaped string section names while Clang does not. Because __section uses the `#` stringification operator for the section name, it doesn't need to be escaped. This antipattern was found with: $ grep -e __section\(\" -e __section__\(\" -r Reported-by: Sedat Dilek Suggested-by: Josh Poimboeuf Signed-off-by: Nick Desaulniers Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 +- arch/arm64/kernel/smp_spin_table.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 64eeaa41e7ca..43da6dd29592 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -78,7 +78,7 @@ static inline u32 cache_type_cwg(void) return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; } -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) static inline int cache_line_size_of_cpu(void) { diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 76c2739ba8a4..c8a3fee00c11 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -19,7 +19,7 @@ #include extern void secondary_holding_pen(void); -volatile unsigned long __section(".mmuoff.data.read") +volatile unsigned long __section(.mmuoff.data.read) secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; -- cgit v1.2.3 From 0bf136a8cf11502643c27c8de1b1db8f161a31ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Aug 2019 15:16:37 +0100 Subject: arm64: constify icache_policy_str[] The icache_policy_str[] array contains compile-time constant data, and is never intentionally modified, so let's mark it as const. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 876055e37352..05933c065732 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static char *icache_policy_str[] = { +static const char *icache_policy_str[] = { [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", [ICACHE_POLICY_VIPT] = "VIPT", [ICACHE_POLICY_PIPT] = "PIPT", -- cgit v1.2.3 From 0da23df2ff043c8d39b389e32ee68af64b5f408e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Aug 2019 15:16:38 +0100 Subject: arm64: constify aarch64_insn_encoding_class[] The aarch64_insn_encoding_class[] array contains compile-time constant data, and is never intentionally modified, so let's mark it as const. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 84b059ed04fc..d801a7094076 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -26,7 +26,7 @@ #define AARCH64_INSN_N_BIT BIT(22) #define AARCH64_INSN_LSL_12 BIT(22) -static int aarch64_insn_encoding_class[] = { +static const int aarch64_insn_encoding_class[] = { AARCH64_INSN_CLS_UNKNOWN, AARCH64_INSN_CLS_UNKNOWN, AARCH64_INSN_CLS_UNKNOWN, -- cgit v1.2.3 From 37143dcc44f8f3348d47616598137b1580468973 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Aug 2019 15:16:39 +0100 Subject: arm64: constify sys64_hook instances All instances of struct sys64_hook contain compile-time constant data, and are never inentionally modified, so let's make them all const. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 42c8422cdf4a..a5d7ce4297b0 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -511,7 +511,7 @@ struct sys64_hook { void (*handler)(unsigned int esr, struct pt_regs *regs); }; -static struct sys64_hook sys64_hooks[] = { +static const struct sys64_hook sys64_hooks[] = { { .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK, .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, @@ -636,7 +636,7 @@ static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) arm64_compat_skip_faulting_instruction(regs, 4); } -static struct sys64_hook cp15_32_hooks[] = { +static const struct sys64_hook cp15_32_hooks[] = { { .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, @@ -656,7 +656,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) arm64_compat_skip_faulting_instruction(regs, 4); } -static struct sys64_hook cp15_64_hooks[] = { +static const struct sys64_hook cp15_64_hooks[] = { { .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, @@ -667,7 +667,7 @@ static struct sys64_hook cp15_64_hooks[] = { asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { - struct sys64_hook *hook, *hook_base; + const struct sys64_hook *hook, *hook_base; if (!cp15_cond_valid(esr, regs)) { /* @@ -707,7 +707,7 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { - struct sys64_hook *hook; + const struct sys64_hook *hook; for (hook = sys64_hooks; hook->handler; hook++) if ((hook->esr_mask & esr) == hook->esr_val) { -- cgit v1.2.3 From 77ad4ce69321abbe26ec92b2a2691a66531eb688 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Aug 2019 14:28:48 +0100 Subject: arm64: memory: rename VA_START to PAGE_END Prior to commit: 14c127c957c1c607 ("arm64: mm: Flip kernel VA space") ... VA_START described the start of the TTBR1 address space for a given VA size described by VA_BITS, where all kernel mappings began. Since that commit, VA_START described a portion midway through the address space, where the linear map ends and other kernel mappings begin. To avoid confusion, let's rename VA_START to PAGE_END, making it clear that it's not the start of the TTBR1 address space and implying that it's related to PAGE_OFFSET. Comments and other mnemonics are updated accordingly, along with a typo fix in the decription of VMEMMAP_SIZE. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Tested-by: Steve Capper Reviewed-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 20 ++++++++++---------- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/mm/dump.c | 6 +++--- arch/arm64/mm/kasan_init.c | 2 +- arch/arm64/mm/mmu.c | 4 ++-- 6 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index d69c2865ae40..a713bad71db5 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -28,20 +28,20 @@ * a struct page array * * If we are configured with a 52-bit kernel VA then our VMEMMAP_SIZE - * neads to cover the memory region from the beginning of the 52-bit - * PAGE_OFFSET all the way to VA_START for 48-bit. This allows us to + * needs to cover the memory region from the beginning of the 52-bit + * PAGE_OFFSET all the way to PAGE_END for 48-bit. This allows us to * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SIZE ((_VA_START(VA_BITS_MIN) - PAGE_OFFSET) \ +#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \ >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)) /* - * PAGE_OFFSET - the virtual address of the start of the linear map (top - * (VA_BITS - 1)) - * KIMAGE_VADDR - the virtual address of the start of the kernel image + * PAGE_OFFSET - the virtual address of the start of the linear map, at the + * start of the TTBR1 address space. + * PAGE_END - the end of the linear map, where all other kernel mappings begin. + * KIMAGE_VADDR - the virtual address of the start of the kernel image. * VA_BITS - the maximum number of bits for virtual addresses. - * VA_START - the first kernel virtual address. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) #define _PAGE_OFFSET(va) (-(UL(1) << (va))) @@ -64,7 +64,7 @@ #define VA_BITS_MIN (VA_BITS) #endif -#define _VA_START(va) (-(UL(1) << ((va) - 1))) +#define _PAGE_END(va) (-(UL(1) << ((va) - 1))) #define KERNEL_START _text #define KERNEL_END _end @@ -87,7 +87,7 @@ #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 -#define KASAN_SHADOW_END (_VA_START(VA_BITS_MIN)) +#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) @@ -173,7 +173,7 @@ #ifndef __ASSEMBLY__ extern u64 vabits_actual; -#define VA_START (_VA_START(vabits_actual)) +#define PAGE_END (_PAGE_END(vabits_actual)) #include #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4a695b9ee0f0..979e24fadf35 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -856,8 +856,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -#define kc_vaddr_to_offset(v) ((v) & ~VA_START) -#define kc_offset_to_vaddr(o) ((o) | VA_START) +#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END) +#define kc_offset_to_vaddr(o) ((o) | PAGE_END) #ifdef CONFIG_ARM64_PA_BITS_52 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index e130db05d932..e0a7fce0e01c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -496,7 +496,7 @@ int swsusp_arch_resume(void) rc = -ENOMEM; goto out; } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, VA_START); + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END); if (rc) goto out; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 8e10b4ba215a..93f9f77582ae 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -28,7 +28,7 @@ enum address_markers_idx { PAGE_OFFSET_NR = 0, - VA_START_NR, + PAGE_END_NR, #ifdef CONFIG_KASAN KASAN_START_NR, #endif @@ -36,7 +36,7 @@ enum address_markers_idx { static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Linear Mapping start" }, - { 0 /* VA_START */, "Linear Mapping end" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, #ifdef CONFIG_KASAN { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, @@ -411,7 +411,7 @@ void ptdump_check_wx(void) static int ptdump_init(void) { - address_markers[VA_START_NR].start_address = VA_START; + address_markers[PAGE_END_NR].start_address = PAGE_END; #ifdef CONFIG_KASAN address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 725222271474..f87a32484ea8 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -226,7 +226,7 @@ void __init kasan_init(void) kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); - kasan_populate_early_shadow(kasan_mem_to_shadow((void *) VA_START), + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); kasan_populate_early_shadow((void *)kimg_shadow_end, (void *)KASAN_SHADOW_END); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c8f7e55f859..8e4b7eaff8ce 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -399,7 +399,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= VA_START) && (virt < VMALLOC_START)) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -426,7 +426,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= VA_START) && (virt < VMALLOC_START)) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; -- cgit v1.2.3 From 90776dd1c427cbb4d381aa4b13338f1fb1d20f5e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 13 Aug 2019 16:04:50 -0700 Subject: arm64/efi: Move variable assignments after SECTIONS It seems that LLVM's linker does not correctly handle variable assignments involving section positions that are updated during the SECTIONS parsing. Commit aa69fb62bea1 ("arm64/efi: Mark __efistub_stext_offset as an absolute symbol explicitly") ran into this too, but found a different workaround. However, this was not enough, as other variables were also miscalculated which manifested as boot failures under UEFI where __efistub__end was not taking the correct _end value (they should be the same): $ ld.lld -EL -maarch64elf --no-undefined -X -shared \ -Bsymbolic -z notext -z norelro --no-apply-dynamic-relocs \ -o vmlinux.lld -T poc.lds --whole-archive vmlinux.o && \ readelf -Ws vmlinux.lld | egrep '\b(__efistub_|)_end\b' 368272: ffff000002218000 0 NOTYPE LOCAL HIDDEN 38 __efistub__end 368322: ffff000012318000 0 NOTYPE GLOBAL DEFAULT 38 _end $ aarch64-linux-gnu-ld.bfd -EL -maarch64elf --no-undefined -X -shared \ -Bsymbolic -z notext -z norelro --no-apply-dynamic-relocs \ -o vmlinux.bfd -T poc.lds --whole-archive vmlinux.o && \ readelf -Ws vmlinux.bfd | egrep '\b(__efistub_|)_end\b' 338124: ffff000012318000 0 NOTYPE LOCAL DEFAULT ABS __efistub__end 383812: ffff000012318000 0 NOTYPE GLOBAL DEFAULT 15325 _end To work around this, all of the __efistub_-prefixed variable assignments need to be moved after the linker script's SECTIONS entry. As it turns out, this also solves the problem fixed in commit aa69fb62bea1, so those changes are reverted here. Link: https://github.com/ClangBuiltLinux/linux/issues/634 Link: https://bugs.llvm.org/show_bug.cgi?id=42990 Acked-by: Ard Biesheuvel Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- arch/arm64/kernel/image-vars.h | 51 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/image.h | 42 --------------------------------- arch/arm64/kernel/vmlinux.lds.S | 2 ++ 3 files changed, 53 insertions(+), 42 deletions(-) create mode 100644 arch/arm64/kernel/image-vars.h (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h new file mode 100644 index 000000000000..25a2a9b479c2 --- /dev/null +++ b/arch/arm64/kernel/image-vars.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Linker script variables to be set after section resolution, as + * ld.lld does not like variables assigned before SECTIONS is processed. + */ +#ifndef __ARM64_KERNEL_IMAGE_VARS_H +#define __ARM64_KERNEL_IMAGE_VARS_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +#ifdef CONFIG_EFI + +__efistub_stext_offset = stext - _text; + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strnlen = __pi_strnlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub_strrchr = __pi_strrchr; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; +__efistub_screen_info = screen_info; + +#endif + +#endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 2b85c0d6fa3d..c7d38c660372 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -65,46 +65,4 @@ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); -#ifdef CONFIG_EFI - -/* - * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol: - * https://github.com/ClangBuiltLinux/linux/issues/561 - */ -__efistub_stext_offset = ABSOLUTE(stext - _text); - -/* - * The EFI stub has its own symbol namespace prefixed by __efistub_, to - * isolate it from the kernel proper. The following symbols are legally - * accessed by the stub, so provide some aliases to make them accessible. - * Only include data symbols here, or text symbols of functions that are - * guaranteed to be safe when executed at another offset than they were - * linked at. The routines below are all implemented in assembler in a - * position independent manner - */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strnlen = __pi_strnlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub_strrchr = __pi_strrchr; -__efistub___flush_dcache_area = __pi___flush_dcache_area; - -#ifdef CONFIG_KASAN -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; -#endif - -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; -__efistub_screen_info = screen_info; - -#endif - #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 31716afa30f6..aa76f7259668 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -254,6 +254,8 @@ SECTIONS HEAD_SYMBOLS } +#include "image-vars.h" + /* * The HYP init code and ID map text can't be longer than a page each, * and should not cross a page boundary. -- cgit v1.2.3 From d55c5f28afafb6b1f0a6978916b23338b383faab Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 12 Jun 2019 13:51:37 +0100 Subject: arm64: smp: disable hotplug on trusted OS resident CPU The trusted OS may reject CPU_OFF calls to its resident CPU, so we must avoid issuing those. We never migrate a Trusted OS and we already take care to prevent CPU_OFF PSCI call. However, this is not reflected explicitly to the userspace. Any user can attempt to hotplug trusted OS resident CPU. The entire motion of going through the various state transitions in the CPU hotplug state machine gets executed and the PSCI layer finally refuses to make CPU_OFF call. This results is unnecessary unwinding of CPU hotplug state machine in the kernel. Instead we can mark the trusted OS resident CPU as not available for hotplug, so that the user attempt or request to do the same will get immediately rejected. Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu_ops.h | 3 +++ arch/arm64/kernel/psci.c | 6 ++++++ arch/arm64/kernel/setup.c | 11 ++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index c09d633c3109..86aabf1e0199 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -23,6 +23,8 @@ * @cpu_boot: Boots a cpu into the kernel. * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary * synchronisation. Called from the cpu being booted. + * @cpu_can_disable: Determines whether a CPU can be disabled based on + * mechanism-specific information. * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific * reason, which will cause the hot unplug to be aborted. Called * from the cpu to be killed. @@ -42,6 +44,7 @@ struct cpu_operations { int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); #ifdef CONFIG_HOTPLUG_CPU + bool (*cpu_can_disable)(unsigned int cpu); int (*cpu_disable)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index a543ab7e007c..c9f72b2665f1 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -46,6 +46,11 @@ static int cpu_psci_cpu_boot(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU +static bool cpu_psci_cpu_can_disable(unsigned int cpu) +{ + return !psci_tos_resident_on(cpu); +} + static int cpu_psci_cpu_disable(unsigned int cpu) { /* Fail early if we don't have CPU_OFF support */ @@ -109,6 +114,7 @@ const struct cpu_operations cpu_psci_ops = { .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, #ifdef CONFIG_HOTPLUG_CPU + .cpu_can_disable = cpu_psci_cpu_can_disable, .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9c4bad7d7131..57ff38600828 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -357,6 +357,15 @@ void __init setup_arch(char **cmdline_p) } } +static inline bool cpu_can_disable(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable) + return cpu_ops[cpu]->cpu_can_disable(cpu); +#endif + return false; +} + static int __init topology_init(void) { int i; @@ -366,7 +375,7 @@ static int __init topology_init(void) for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_data.cpu, i); - cpu->hotpluggable = 1; + cpu->hotpluggable = cpu_can_disable(i); register_cpu(cpu, i); } -- cgit v1.2.3 From d91cc2f46ad5bd8020902093385fedf7e9e9d755 Mon Sep 17 00:00:00 2001 From: Raphael Gault Date: Tue, 20 Aug 2019 16:57:45 +0100 Subject: arm64: perf_event: Add missing header needed for smp_processor_id() In perf_event.c we use smp_processor_id(), but we haven't included where it is defined, and rely on this being pulled in via a transitive include. Let's make this more robust by including explicitly. Acked-by: Mark Rutland Signed-off-by: Raphael Gault Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2d3bdebdf6df..a0b4f1bca491 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -19,6 +19,7 @@ #include #include #include +#include /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 -- cgit v1.2.3 From 413235fcedc7f61e925fe9818bc3f5eff8ad2494 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 15 Aug 2019 16:44:01 +0100 Subject: arm64: Change the tagged_addr sysctl control semantics to only prevent the opt-in First rename the sysctl control to abi.tagged_addr_disabled and make it default off (zero). When abi.tagged_addr_disabled == 1, only block the enabling of the TBI ABI via prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE). Getting the status of the ABI or disabling it is still allowed. Acked-by: Andrey Konovalov Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 76b7c55026aa..03689c0beb34 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -579,17 +579,22 @@ void arch_setup_new_exec(void) /* * Control the relaxed ABI allowing tagged user addresses into the kernel. */ -static unsigned int tagged_addr_prctl_allowed = 1; +static unsigned int tagged_addr_disabled; long set_tagged_addr_ctrl(unsigned long arg) { - if (!tagged_addr_prctl_allowed) - return -EINVAL; if (is_compat_task()) return -EINVAL; if (arg & ~PR_TAGGED_ADDR_ENABLE) return -EINVAL; + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) + return -EINVAL; + update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; @@ -597,8 +602,6 @@ long set_tagged_addr_ctrl(unsigned long arg) long get_tagged_addr_ctrl(void) { - if (!tagged_addr_prctl_allowed) - return -EINVAL; if (is_compat_task()) return -EINVAL; @@ -618,9 +621,9 @@ static int one = 1; static struct ctl_table tagged_addr_sysctl_table[] = { { - .procname = "tagged_addr", + .procname = "tagged_addr_disabled", .mode = 0644, - .data = &tagged_addr_prctl_allowed, + .data = &tagged_addr_disabled, .maxlen = sizeof(int), .proc_handler = proc_dointvec_minmax, .extra1 = &zero, -- cgit v1.2.3 From 2671828c3ff4ffadf777f793a1f3232d6e51394a Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 20 Aug 2019 18:45:57 +0100 Subject: arm64: entry: Move ct_user_exit before any other exception When taking an SError or Debug exception from EL0, we run the C handler for these exceptions before updating the context tracking code and unmasking lower priority interrupts. When booting with nohz_full lockdep tells us we got this wrong: | ============================= | WARNING: suspicious RCU usage | 5.3.0-rc2-00010-gb4b5e9dcb11b-dirty #11271 Not tainted | ----------------------------- | include/linux/rcupdate.h:643 rcu_read_unlock() used illegally wh! | | other info that might help us debug this: | | | RCU used illegally from idle CPU! | rcu_scheduler_active = 2, debug_locks = 1 | RCU used illegally from extended quiescent state! | 1 lock held by a.out/432: | #0: 00000000c7a79515 (rcu_read_lock){....}, at: brk_handler+0x00 | | stack backtrace: | CPU: 1 PID: 432 Comm: a.out Not tainted 5.3.0-rc2-00010-gb4b5e9d1 | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno De8 | Call trace: | dump_backtrace+0x0/0x140 | show_stack+0x14/0x20 | dump_stack+0xbc/0x104 | lockdep_rcu_suspicious+0xf8/0x108 | brk_handler+0x164/0x1b0 | do_debug_exception+0x11c/0x278 | el0_dbg+0x14/0x20 Moving the ct_user_exit calls to be before do_debug_exception() means they are also before trace_hardirqs_off() has been updated. Add a new ct_user_exit_irqoff macro to avoid the context-tracking code using irqsave/restore before we've updated trace_hardirqs_off(). To be consistent, do this everywhere. The C helper is called enter_from_user_mode() to match x86 in the hope we can merge them into kernel/context_tracking.c later. Cc: Masami Hiramatsu Fixes: 6c81fe7925cc4c42 ("arm64: enable context tracking") Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry.S | 36 +++++++++++++++++++----------------- arch/arm64/kernel/traps.c | 9 +++++++++ 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index ed57b760f38c..a17393ff6677 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -30,4 +30,6 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void enter_from_user_mode(void); + #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 320a30dbe35e..84a822748c84 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,9 +30,9 @@ * Context tracking subsystem. Used to instrument transitions * between user and kernel mode. */ - .macro ct_user_exit + .macro ct_user_exit_irqoff #ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_exit + bl enter_from_user_mode #endif .endm @@ -792,8 +792,8 @@ el0_cp15: /* * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_cp15instr @@ -805,8 +805,8 @@ el0_da: * Data abort handling */ mrs x26, far_el1 + ct_user_exit_irqoff enable_daif - ct_user_exit clear_address_tag x0, x26 mov x1, x25 mov x2, sp @@ -818,11 +818,11 @@ el0_ia: */ mrs x26, far_el1 gic_prio_kentry_setup tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp @@ -832,8 +832,8 @@ el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_fpsimd_acc @@ -842,8 +842,8 @@ el0_sve_acc: /* * Scalable Vector Extension access */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_sve_acc @@ -852,8 +852,8 @@ el0_fpsimd_exc: /* * Floating Point, Advanced SIMD or SVE exception */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_fpsimd_exc @@ -868,11 +868,11 @@ el0_sp_pc: * Stack or PC alignment exception handling */ gic_prio_kentry_setup tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp @@ -882,8 +882,8 @@ el0_undef: /* * Undefined instruction */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, sp bl do_undefinstr b ret_to_user @@ -891,8 +891,8 @@ el0_sys: /* * System instructions, for trapped cache maintenance instructions */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_sysinstr @@ -902,17 +902,18 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + mrs x24, far_el1 gic_prio_kentry_setup tmp=x3 - mrs x0, far_el1 + ct_user_exit_irqoff + mov x0, x24 mov x1, x25 mov x2, sp bl do_debug_exception enable_da_f - ct_user_exit b ret_to_user el0_inv: + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, sp mov x1, #BAD_SYNC mov x2, x25 @@ -925,13 +926,13 @@ el0_irq: kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR tbz x22, #55, 1f bl do_el0_irq_bp_hardening @@ -958,13 +959,14 @@ ENDPROC(el1_error) el0_error: kernel_entry 0 el0_error_naked: - mrs x1, esr_el1 + mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 + ct_user_exit_irqoff enable_dbg mov x0, sp + mov x1, x25 bl do_serror enable_da_f - ct_user_exit b ret_to_user ENDPROC(el0_error) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index a5d7ce4297b0..6e950908eb97 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -7,9 +7,11 @@ */ #include +#include #include #include #include +#include #include #include #include @@ -900,6 +902,13 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) nmi_exit(); } +asmlinkage void enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); +} +NOKPROBE_SYMBOL(enter_from_user_mode); + void __pte_error(const char *file, int line, unsigned long val) { pr_err("%s:%d: bad pte %016lx.\n", file, line, val); -- cgit v1.2.3 From e112b032a72c78f15d0c803c5dc6be444c2e6c66 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 23 Aug 2019 14:24:50 +0800 Subject: arm64: map FDT as RW for early_init_dt_scan() Currently in arm64, FDT is mapped to RO before it's passed to early_init_dt_scan(). However, there might be some codes (eg. commit "fdt: add support for rng-seed") that need to modify FDT during init. Map FDT to RO after early fixups are done. Signed-off-by: Hsin-Yi Wang Reviewed-by: Stephen Boyd Reviewed-by: Mike Rapoport Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 2 +- arch/arm64/kernel/kaslr.c | 5 +---- arch/arm64/kernel/setup.c | 9 ++++++++- arch/arm64/mm/mmu.c | 15 +-------------- 4 files changed, 11 insertions(+), 20 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index fd6161336653..f217e3292919 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -126,7 +126,7 @@ extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); -extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); #define INIT_MM_CONTEXT(name) \ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 708051655ad9..d94a3e41cef9 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -62,9 +62,6 @@ out: return default_cmdline; } -extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, - pgprot_t prot); - /* * This routine will be executed with the kernel mapped at its default virtual * address, and if it returns successfully, the kernel will be remapped, and @@ -93,7 +90,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * attempt at mapping the FDT in setup_machine() */ early_fixmap_init(); - fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); if (!fdt) return 0; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9c4bad7d7131..25f5127210f8 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -170,9 +170,13 @@ static void __init smp_build_mpidr_hash(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { - void *dt_virt = fixmap_remap_fdt(dt_phys); + int size; + void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); const char *name; + if (dt_virt) + memblock_reserve(dt_phys, size); + if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("\n" "Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n" @@ -184,6 +188,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } + /* Early fixups are done, map the FDT as read-only now */ + fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + name = of_flat_dt_get_machine_name(); if (!name) return; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 750a69dde39b..54e93583085c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -876,7 +876,7 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); int offset; @@ -929,19 +929,6 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) -{ - void *dt_virt; - int size; - - dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); - if (!dt_virt) - return NULL; - - memblock_reserve(dt_phys, size); - return dt_virt; -} - int __init arch_ioremap_p4d_supported(void) { return 0; -- cgit v1.2.3 From 7f591fa7a62d3a3f585fd4ba5c3e7b05f4b931be Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Fri, 23 Aug 2019 14:24:52 +0800 Subject: arm64: kexec_file: add rng-seed support Adding "rng-seed" to dtb. It's fine to add this property if original fdt doesn't contain it. Since original seed will be wiped after read, so use a default size 128 bytes here. Signed-off-by: Hsin-Yi Wang Reviewed-by: Stephen Boyd Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 58871333737a..81b5baad97aa 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -27,6 +27,8 @@ #define FDT_PROP_INITRD_END "linux,initrd-end" #define FDT_PROP_BOOTARGS "bootargs" #define FDT_PROP_KASLR_SEED "kaslr-seed" +#define FDT_PROP_RNG_SEED "rng-seed" +#define RNG_SEED_SIZE 128 const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_image_ops, @@ -102,6 +104,19 @@ static int setup_dtb(struct kimage *image, FDT_PROP_KASLR_SEED); } + /* add rng-seed */ + if (rng_is_initialized()) { + u8 rng_seed[RNG_SEED_SIZE]; + get_random_bytes(rng_seed, RNG_SEED_SIZE); + ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed, + RNG_SEED_SIZE); + if (ret) + goto out; + } else { + pr_notice("RNG is not initialised: omitting \"%s\" property\n", + FDT_PROP_RNG_SEED); + } + out: if (ret) return (ret == -FDT_ERR_NOSPACE) ? -ENOMEM : -EINVAL; @@ -110,7 +125,8 @@ out: } /* - * More space needed so that we can add initrd, bootargs and kaslr-seed. + * More space needed so that we can add initrd, bootargs, kaslr-seed, and + * rng-seed. */ #define DTB_EXTRA_SPACE 0x1000 -- cgit v1.2.3 From 0e1645557d19fc6d88d3c40431f63a3c3a4c417b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Aug 2019 14:25:44 +0100 Subject: arm64: smp: Increase secondary CPU boot timeout value When many debug options are enabled simultaneously (e.g. PROVE_LOCKING, KMEMLEAK, DEBUG_PAGE_ALLOC, KASAN etc), it is possible for us to timeout when attempting to boot a secondary CPU and give up. Unfortunately, the CPU will /eventually/ appear, and sit in the background happily stuck in a recursive exception due to a NULL stack pointer. Increase the timeout to 5s, which will of course be enough for anybody. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 018a33e01b0e..63c7a7682e93 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -123,7 +123,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * time out. */ wait_for_completion_timeout(&cpu_running, - msecs_to_jiffies(1000)); + msecs_to_jiffies(5000)); if (!cpu_online(cpu)) { pr_crit("CPU%u: failed to come online\n", cpu); -- cgit v1.2.3 From 5b1cfe3a0ba74c1f2b83b607712a217b9f9463a2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Aug 2019 14:36:38 +0100 Subject: arm64: smp: Don't enter kernel with NULL stack pointer or task struct Although SMP bringup is inherently racy, we can significantly reduce the window during which secondary CPUs can unexpectedly enter the kernel by sanity checking the 'stack' and 'task' fields of the 'secondary_data' structure. If the booting CPU gave up waiting for us, then they will have been cleared to NULL and we should spin in a WFE; WFI loop instead. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 8 ++++++++ arch/arm64/kernel/smp.c | 1 + 2 files changed, 9 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2cdacd1c141b..0baadf335172 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -724,14 +724,22 @@ __secondary_switched: adr_l x0, secondary_data ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + cbz x1, __secondary_too_slow mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] + cbz x2, __secondary_too_slow msr sp_el0, x2 mov x29, #0 mov x30, #0 b secondary_start_kernel ENDPROC(__secondary_switched) +__secondary_too_slow: + wfe + wfi + b __secondary_too_slow +ENDPROC(__secondary_too_slow) + /* * The booting CPU updates the failed status @__early_cpu_boot_status, * with MMU turned off. diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 63c7a7682e93..1f8aeb77cba5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -136,6 +136,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = NULL; secondary_data.stack = NULL; + __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (ret && status) { -- cgit v1.2.3 From ebef746543fd1aa162216b0e484eb9062b65741d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Aug 2019 15:54:56 +0100 Subject: arm64: smp: Treat unknown boot failures as being 'stuck in kernel' When we fail to bring a secondary CPU online and it fails in an unknown state, we should assume the worst and increment 'cpus_stuck_in_kernel' so that things like kexec() are disabled. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 1f8aeb77cba5..dc9fe879c279 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -147,6 +147,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) default: pr_err("CPU%u: failed in unknown state : 0x%lx\n", cpu, status); + cpus_stuck_in_kernel++; break; case CPU_KILL_ME: if (!op_cpu_kill(cpu)) { -- cgit v1.2.3