From 7202bde8b7aea7c914e33c7bc2dfb145587f1bdc Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 17 Oct 2016 16:46:45 +0100 Subject: arm64: parse cpu capacity-dmips-mhz from DT With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities can now be calculated from values extracted from DT and information coming from cpufreq. Add parsing of DT information at boot time, and complement it with cpufreq information. Also, store such information using per CPU variables, as we do for arm. Caveat: the information provided by this patch will start to be used in the future. We need to #define arch_scale_cpu_capacity to something provided in arch, so that scheduler's default implementation (which gets used if arch_scale_cpu_capacity is not defined) is overwritten. Cc: Will Deacon Cc: Mark Brown Cc: Sudeep Holla Signed-off-by: Juri Lelli Acked-by: Vincent Guittot Signed-off-by: Catalin Marinas --- arch/arm64/kernel/topology.c | 159 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 694f6deedbab..b75b0ba2e113 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -19,10 +19,162 @@ #include #include #include +#include +#include #include #include +static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; + +unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + +static u32 capacity_scale; +static u32 *raw_capacity; +static bool cap_parsing_failed; + +static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu) +{ + int ret; + u32 cpu_capacity; + + if (cap_parsing_failed) + return; + + ret = of_property_read_u32(cpu_node, + "capacity-dmips-mhz", + &cpu_capacity); + if (!ret) { + if (!raw_capacity) { + raw_capacity = kcalloc(num_possible_cpus(), + sizeof(*raw_capacity), + GFP_KERNEL); + if (!raw_capacity) { + pr_err("cpu_capacity: failed to allocate memory for raw capacities\n"); + cap_parsing_failed = true; + return; + } + } + capacity_scale = max(cpu_capacity, capacity_scale); + raw_capacity[cpu] = cpu_capacity; + pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n", + cpu_node->full_name, raw_capacity[cpu]); + } else { + if (raw_capacity) { + pr_err("cpu_capacity: missing %s raw capacity\n", + cpu_node->full_name); + pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n"); + } + cap_parsing_failed = true; + kfree(raw_capacity); + } +} + +static void normalize_cpu_capacity(void) +{ + u64 capacity; + int cpu; + + if (!raw_capacity || cap_parsing_failed) + return; + + pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); + for_each_possible_cpu(cpu) { + pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", + cpu, raw_capacity[cpu]); + capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT) + / capacity_scale; + set_capacity_scale(cpu, capacity); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, arch_scale_cpu_capacity(NULL, cpu)); + } +} + +#ifdef CONFIG_CPU_FREQ +static cpumask_var_t cpus_to_visit; +static bool cap_parsing_done; +static void parsing_done_workfn(struct work_struct *work); +static DECLARE_WORK(parsing_done_work, parsing_done_workfn); + +static int +init_cpu_capacity_callback(struct notifier_block *nb, + unsigned long val, + void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (cap_parsing_failed || cap_parsing_done) + return 0; + + switch (val) { + case CPUFREQ_NOTIFY: + pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n", + cpumask_pr_args(policy->related_cpus), + cpumask_pr_args(cpus_to_visit)); + cpumask_andnot(cpus_to_visit, + cpus_to_visit, + policy->related_cpus); + for_each_cpu(cpu, policy->related_cpus) { + raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) * + policy->cpuinfo.max_freq / 1000UL; + capacity_scale = max(raw_capacity[cpu], capacity_scale); + } + if (cpumask_empty(cpus_to_visit)) { + normalize_cpu_capacity(); + kfree(raw_capacity); + pr_debug("cpu_capacity: parsing done\n"); + cap_parsing_done = true; + schedule_work(&parsing_done_work); + } + } + return 0; +} + +static struct notifier_block init_cpu_capacity_notifier = { + .notifier_call = init_cpu_capacity_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + if (cap_parsing_failed) + return -EINVAL; + + if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) { + pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n"); + return -ENOMEM; + } + cpumask_copy(cpus_to_visit, cpu_possible_mask); + + return cpufreq_register_notifier(&init_cpu_capacity_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +static void parsing_done_workfn(struct work_struct *work) +{ + cpufreq_unregister_notifier(&init_cpu_capacity_notifier, + CPUFREQ_POLICY_NOTIFIER); +} + +#else +static int __init free_raw_capacity(void) +{ + kfree(raw_capacity); + + return 0; +} +core_initcall(free_raw_capacity); +#endif + static int __init get_cpu_for_node(struct device_node *node) { struct device_node *cpu_node; @@ -34,6 +186,7 @@ static int __init get_cpu_for_node(struct device_node *node) for_each_possible_cpu(cpu) { if (of_get_cpu_node(cpu, NULL) == cpu_node) { + parse_cpu_capacity(cpu_node, cpu); of_node_put(cpu_node); return cpu; } @@ -178,13 +331,17 @@ static int __init parse_dt_topology(void) * cluster with restricted subnodes. */ map = of_get_child_by_name(cn, "cpu-map"); - if (!map) + if (!map) { + cap_parsing_failed = true; goto out; + } ret = parse_cluster(map, 0); if (ret != 0) goto out_map; + normalize_cpu_capacity(); + /* * Check that all cores are in the topology; the SMP code will * only mark cores described in the DT as possible. -- cgit v1.2.3 From be8f185d8af4dbd450023a42a48c6faa8cbcdfe6 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Thu, 3 Nov 2016 05:40:18 +0000 Subject: arm64: add sysfs cpu_capacity attribute Add a sysfs cpu_capacity attribute with which it is possible to read and write (thus over-writing default values) CPUs capacity. This might be useful in situations where values needs changing after boot. The new attribute shows up as: /sys/devices/system/cpu/cpu*/cpu_capacity Cc: Will Deacon Cc: Mark Brown Cc: Sudeep Holla Signed-off-by: Juri Lelli Signed-off-by: Catalin Marinas --- arch/arm64/kernel/topology.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index b75b0ba2e113..23e9e13bd2aa 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -20,12 +20,15 @@ #include #include #include +#include #include +#include #include #include static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; +static DEFINE_MUTEX(cpu_scale_mutex); unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) { @@ -37,6 +40,65 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) per_cpu(cpu_scale, cpu) = capacity; } +#ifdef CONFIG_PROC_SYSCTL +static ssize_t cpu_capacity_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + + return sprintf(buf, "%lu\n", + arch_scale_cpu_capacity(NULL, cpu->dev.id)); +} + +static ssize_t cpu_capacity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int this_cpu = cpu->dev.id, i; + unsigned long new_capacity; + ssize_t ret; + + if (count) { + ret = kstrtoul(buf, 0, &new_capacity); + if (ret) + return ret; + if (new_capacity > SCHED_CAPACITY_SCALE) + return -EINVAL; + + mutex_lock(&cpu_scale_mutex); + for_each_cpu(i, &cpu_topology[this_cpu].core_sibling) + set_capacity_scale(i, new_capacity); + mutex_unlock(&cpu_scale_mutex); + } + + return count; +} + +static DEVICE_ATTR_RW(cpu_capacity); + +static int register_cpu_capacity_sysctl(void) +{ + int i; + struct device *cpu; + + for_each_possible_cpu(i) { + cpu = get_cpu_device(i); + if (!cpu) { + pr_err("%s: too early to get CPU%d device!\n", + __func__, i); + continue; + } + device_create_file(cpu, &dev_attr_cpu_capacity); + } + + return 0; +} +subsys_initcall(register_cpu_capacity_sysctl); +#endif + static u32 capacity_scale; static u32 *raw_capacity; static bool cap_parsing_failed; @@ -87,6 +149,7 @@ static void normalize_cpu_capacity(void) return; pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale); + mutex_lock(&cpu_scale_mutex); for_each_possible_cpu(cpu) { pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n", cpu, raw_capacity[cpu]); @@ -96,6 +159,7 @@ static void normalize_cpu_capacity(void) pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", cpu, arch_scale_cpu_capacity(NULL, cpu)); } + mutex_unlock(&cpu_scale_mutex); } #ifdef CONFIG_CPU_FREQ -- cgit v1.2.3 From 4890ae46911c99d61fb622605c2d81bf8c86b209 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 18 Oct 2016 13:46:27 +0100 Subject: arm64/kprobes: Tidy up sign-extension usage Kprobes does not need its own homebrewed (and frankly inscrutable) sign extension macro; just use the standard kernel functions instead. Since the compiler actually recognises the sign-extension idiom of the latter, we also get the small bonus of some nicer codegen, as each displacement calculation helper then compiles to a single optimal SBFX instruction. Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/simulate-insn.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c index 8977ce9d009d..357d3efe1366 100644 --- a/arch/arm64/kernel/probes/simulate-insn.c +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -13,28 +13,26 @@ * General Public License for more details. */ +#include #include #include #include "simulate-insn.h" -#define sign_extend(x, signbit) \ - ((x) | (0 - ((x) & (1 << (signbit))))) - #define bbl_displacement(insn) \ - sign_extend(((insn) & 0x3ffffff) << 2, 27) + sign_extend32(((insn) & 0x3ffffff) << 2, 27) #define bcond_displacement(insn) \ - sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20) #define cbz_displacement(insn) \ - sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20) #define tbz_displacement(insn) \ - sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + sign_extend32(((insn >> 5) & 0x3fff) << 2, 15) #define ldr_displacement(insn) \ - sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20) static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) { @@ -106,7 +104,7 @@ simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) xn = opcode & 0x1f; imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); - imm = sign_extend(imm, 20); + imm = sign_extend64(imm, 20); if (opcode & 0x80000000) val = (imm<<12) + (addr & 0xfffffffffffff000); else -- cgit v1.2.3 From b7b941afe557638b29aa6bab7070cc706f3231c1 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 26 Oct 2016 19:19:31 +0100 Subject: arm64: Remove pointless WARN_ON in DMA teardown We expect arch_teardown_dma_ops() to be called very late in a device's life, after it has been removed from its bus, and thus after the IOMMU bus notifier has run. As such, even if this funny little check did make sense, it's unlikely to achieve what it thinks it's trying to do anyway. It's a residual trace of an earlier implementation which didn't belong here from the start; belatedly snuff it out. Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3f74d0d98de6..aa6c8f834d9e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -938,11 +938,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_teardown_dma_ops(struct device *dev) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - - if (WARN_ON(domain)) - iommu_detach_device(domain, dev); - dev->archdata.dma_ops = NULL; } -- cgit v1.2.3 From e98216b52176ba2bfa4bdb02f178f4d08832d465 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Oct 2016 12:22:56 +0100 Subject: arm64: mm: BUG on unsupported manipulations of live kernel mappings Now that we take care not manipulate the live kernel page tables in a way that may lead to TLB conflicts, the case where a table mapping is replaced by a block mapping can no longer occur. So remove the handling of this at the PUD and PMD levels, and instead, BUG() on any occurrence of live kernel page table manipulations that modify anything other than the permission bits. Since mark_rodata_ro() is the only caller where the kernel mappings that are being manipulated are actually live, drop the various conditional flush_tlb_all() invocations, and add a single call to mark_rodata_ro() instead. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 70 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 27 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 05615a3fdc6f..27dc0e5012a8 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include #include @@ -95,6 +93,17 @@ static phys_addr_t __init early_pgtable_alloc(void) return phys; } +static bool pgattr_change_is_safe(u64 old, u64 new) +{ + /* + * The following mapping attributes may be updated in live + * kernel mappings without the need for break-before-make. + */ + static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE; + + return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0; +} + static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, @@ -115,8 +124,17 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_set_fixmap_offset(pmd, addr); do { + pte_t old_pte = *pte; + set_pte(pte, pfn_pte(pfn, prot)); pfn++; + + /* + * After the PTE entry has been populated once, we + * only allow updates to the permission attributes. + */ + BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte))); + } while (pte++, addr += PAGE_SIZE, addr != end); pte_clear_fixmap(); @@ -146,27 +164,27 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, pmd = pmd_set_fixmap_offset(pud, addr); do { + pmd_t old_pmd = *pmd; + next = pmd_addr_end(addr, end); + /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && allow_block_mappings) { - pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); + /* - * Check for previous table entries created during - * boot (__create_page_tables) and flush them. + * After the PMD entry has been populated once, we + * only allow updates to the permission attributes. */ - if (!pmd_none(old_pmd)) { - flush_tlb_all(); - if (pmd_table(old_pmd)) { - phys_addr_t table = pmd_page_paddr(old_pmd); - if (!WARN_ON_ONCE(slab_is_available())) - memblock_free(table, PAGE_SIZE); - } - } + BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), + pmd_val(*pmd))); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), prot, pgtable_alloc); + + BUG_ON(pmd_val(old_pmd) != 0 && + pmd_val(old_pmd) != pmd_val(*pmd)); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -204,33 +222,28 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud = pud_set_fixmap_offset(pgd, addr); do { + pud_t old_pud = *pud; + next = pud_addr_end(addr, end); /* * For 4K granule only, attempt to put down a 1GB block */ if (use_1G_block(addr, next, phys) && allow_block_mappings) { - pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); /* - * If we have an old value for a pud, it will - * be pointing to a pmd table that we no longer - * need (from swapper_pg_dir). - * - * Look up the old pmd table and free it. + * After the PUD entry has been populated once, we + * only allow updates to the permission attributes. */ - if (!pud_none(old_pud)) { - flush_tlb_all(); - if (pud_table(old_pud)) { - phys_addr_t table = pud_page_paddr(old_pud); - if (!WARN_ON_ONCE(slab_is_available())) - memblock_free(table, PAGE_SIZE); - } - } + BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), + pud_val(*pud))); } else { alloc_init_pmd(pud, addr, next, phys, prot, pgtable_alloc, allow_block_mappings); + + BUG_ON(pud_val(old_pud) != 0 && + pud_val(old_pud) != pud_val(*pud)); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -396,6 +409,9 @@ void mark_rodata_ro(void) section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); + + /* flush the TLBs after updating live kernel mappings */ + flush_tlb_all(); } static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, -- cgit v1.2.3 From f14c66ce81b5595a483bd83df151539dbe1058fa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Oct 2016 12:22:57 +0100 Subject: arm64: mm: replace 'block_mappings_allowed' with 'page_mappings_only' In preparation of adding support for contiguous PTE and PMD mappings, let's replace 'block_mappings_allowed' with 'page_mappings_only', which will be a more accurate description of the nature of the setting once we add such contiguous mappings into the mix. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mmu.h | 2 +- arch/arm64/kernel/efi.c | 8 ++++---- arch/arm64/mm/mmu.c | 32 ++++++++++++++++---------------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 8d9fce037b2f..a81454ad5455 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -34,7 +34,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot, bool allow_block_mappings); + pgprot_t prot, bool page_mappings_only); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ba9bee389fd5..5d17f377d905 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -62,8 +62,8 @@ struct screen_info screen_info __section(.data); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); - bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && - md->type != EFI_RUNTIME_SERVICES_DATA); + bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_RUNTIME_SERVICES_DATA); if (!PAGE_ALIGNED(md->phys_addr) || !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { @@ -76,12 +76,12 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) * from the MMU routines. So avoid block mappings altogether in * that case. */ - allow_block_mappings = false; + page_mappings_only = true; } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, - __pgprot(prot_val | PTE_NG), allow_block_mappings); + __pgprot(prot_val | PTE_NG), page_mappings_only); return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 27dc0e5012a8..7b0dd07212ae 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -143,7 +143,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool allow_block_mappings) + bool page_mappings_only) { pmd_t *pmd; unsigned long next; @@ -170,7 +170,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - allow_block_mappings) { + !page_mappings_only) { pmd_set_huge(pmd, phys, prot); /* @@ -207,7 +207,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool allow_block_mappings) + bool page_mappings_only) { pud_t *pud; unsigned long next; @@ -229,7 +229,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && allow_block_mappings) { + if (use_1G_block(addr, next, phys) && !page_mappings_only) { pud_set_huge(pud, phys, prot); /* @@ -240,7 +240,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud_val(*pud))); } else { alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc, allow_block_mappings); + pgtable_alloc, page_mappings_only); BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != pud_val(*pud)); @@ -255,7 +255,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(void), - bool allow_block_mappings) + bool page_mappings_only) { unsigned long addr, length, end, next; pgd_t *pgd = pgd_offset_raw(pgdir, virt); @@ -275,7 +275,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, - allow_block_mappings); + page_mappings_only); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -304,17 +304,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot, bool allow_block_mappings) + pgprot_t prot, bool page_mappings_only) { BUG_ON(mm == &init_mm); __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - pgd_pgtable_alloc, allow_block_mappings); + pgd_pgtable_alloc, page_mappings_only); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -327,7 +327,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL, !debug_pagealloc_enabled()); + NULL, debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) @@ -345,7 +345,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, early_pgtable_alloc, - !debug_pagealloc_enabled()); + debug_pagealloc_enabled()); return; } @@ -358,13 +358,13 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, early_pgtable_alloc, - !debug_pagealloc_enabled()); + debug_pagealloc_enabled()); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, early_pgtable_alloc, - !debug_pagealloc_enabled()); + debug_pagealloc_enabled()); /* * Map the linear alias of the [_text, __init_begin) interval as @@ -374,7 +374,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc, !debug_pagealloc_enabled()); + early_pgtable_alloc, debug_pagealloc_enabled()); } static void __init map_mem(pgd_t *pgd) @@ -424,7 +424,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc, !debug_pagealloc_enabled()); + early_pgtable_alloc, debug_pagealloc_enabled()); vma->addr = va_start; vma->phys_addr = pa_start; -- cgit v1.2.3 From 0bfc445dec9dd8130d22c9f4476eed7598524129 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 21 Oct 2016 12:22:58 +0100 Subject: arm64: mm: set the contiguous bit for kernel mappings where appropriate Now that we no longer allow live kernel PMDs to be split, it is safe to start using the contiguous bit for kernel mappings. So set the contiguous bit in the kernel page mappings for regions whose size and alignment are suitable for this. This enables the following contiguous range sizes for the virtual mapping of the kernel image, and for the linear mapping: granule size | cont PTE | cont PMD | -------------+------------+------------+ 4 KB | 64 KB | 32 MB | 16 KB | 2 MB | 1 GB* | 64 KB | 2 MB | 16 GB* | * Only when built for 3 or more levels of translation. This is due to the fact that a 2 level configuration only consists of PGDs and PTEs, and the added complexity of dealing with folded PMDs is not justified considering that 16 GB contiguous ranges are likely to be ignored by the hardware (and 16k/2 levels is a niche configuration) Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7b0dd07212ae..dd5f12d0959e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -107,8 +107,10 @@ static bool pgattr_change_is_safe(u64 old, u64 new) static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool page_mappings_only) { + pgprot_t __prot = prot; pte_t *pte; BUG_ON(pmd_sect(*pmd)); @@ -126,7 +128,18 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, do { pte_t old_pte = *pte; - set_pte(pte, pfn_pte(pfn, prot)); + /* + * Set the contiguous bit for the subsequent group of PTEs if + * its size and alignment are appropriate. + */ + if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) { + if (end - addr >= CONT_PTE_SIZE && !page_mappings_only) + __prot = __pgprot(pgprot_val(prot) | PTE_CONT); + else + __prot = prot; + } + + set_pte(pte, pfn_pte(pfn, __prot)); pfn++; /* @@ -145,6 +158,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t (*pgtable_alloc)(void), bool page_mappings_only) { + pgprot_t __prot = prot; pmd_t *pmd; unsigned long next; @@ -171,7 +185,18 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && !page_mappings_only) { - pmd_set_huge(pmd, phys, prot); + /* + * Set the contiguous bit for the subsequent group of + * PMDs if its size and alignment are appropriate. + */ + if (((addr | phys) & ~CONT_PMD_MASK) == 0) { + if (end - addr >= CONT_PMD_SIZE) + __prot = __pgprot(pgprot_val(prot) | + PTE_CONT); + else + __prot = prot; + } + pmd_set_huge(pmd, phys, __prot); /* * After the PMD entry has been populated once, we @@ -181,7 +206,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, pmd_val(*pmd))); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, pgtable_alloc); + prot, pgtable_alloc, + page_mappings_only); BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != pmd_val(*pmd)); -- cgit v1.2.3 From 4ddb9bf83349b4f4f8178e58c3654ac7ec7edbc6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 27 Oct 2016 09:27:31 -0700 Subject: arm64: dump: Make ptdump debugfs a separate option ptdump_register currently initializes a set of page table information and registers debugfs. There are uses for the ptdump option without wanting the debugfs options. Split this out to make it a separate option. Reviewed-by: Ard Biesheuvel Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 6 +++++- arch/arm64/include/asm/ptdump.h | 15 +++++++++------ arch/arm64/mm/Makefile | 3 ++- arch/arm64/mm/dump.c | 26 +++++--------------------- arch/arm64/mm/ptdump_debugfs.c | 31 +++++++++++++++++++++++++++++++ drivers/firmware/efi/arm-runtime.c | 4 ++-- 6 files changed, 54 insertions(+), 31 deletions(-) create mode 100644 arch/arm64/mm/ptdump_debugfs.c (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index b661fe742615..21a5b7409641 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -2,9 +2,13 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config ARM64_PTDUMP +config ARM64_PTDUMP_CORE + def_bool n + +config ARM64_PTDUMP_DEBUGFS bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL + select ARM64_PTDUMP_CORE select DEBUG_FS help Say Y here if you want to show the kernel pagetable layout in a diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 07b8ed037dee..16335dac3c1f 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -16,9 +16,10 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H -#ifdef CONFIG_ARM64_PTDUMP +#ifdef CONFIG_ARM64_PTDUMP_CORE #include +#include struct addr_marker { unsigned long start_address; @@ -32,13 +33,15 @@ struct ptdump_info { unsigned long max_addr; }; -int ptdump_register(struct ptdump_info *info, const char *name); - +void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS +int ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else -static inline int ptdump_register(struct ptdump_info *info, const char *name) +static inline int ptdump_debugfs_register(struct ptdump_info *info, + const char *name) { return 0; } -#endif /* CONFIG_ARM64_PTDUMP */ - +#endif +#endif /* CONFIG_ARM64_PTDUMP_CORE */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 54bb209cae8e..e703fb9defad 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -3,7 +3,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_ARM64_PTDUMP) += dump.o +obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o +obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 9c3e75df2180..f0f0be74d8e7 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -304,9 +304,8 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, } } -static int ptdump_show(struct seq_file *m, void *v) +void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) { - struct ptdump_info *info = m->private; struct pg_state st = { .seq = m, .marker = info->markers, @@ -315,33 +314,16 @@ static int ptdump_show(struct seq_file *m, void *v) walk_pgd(&st, info->mm, info->base_addr); note_page(&st, 0, 0, 0); - return 0; } -static int ptdump_open(struct inode *inode, struct file *file) +static void ptdump_initialize(void) { - return single_open(file, ptdump_show, inode->i_private); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int ptdump_register(struct ptdump_info *info, const char *name) -{ - struct dentry *pe; unsigned i, j; for (i = 0; i < ARRAY_SIZE(pg_level); i++) if (pg_level[i].bits) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - - pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); - return pe ? 0 : -ENOMEM; } static struct ptdump_info kernel_ptdump_info = { @@ -352,6 +334,8 @@ static struct ptdump_info kernel_ptdump_info = { static int ptdump_init(void) { - return ptdump_register(&kernel_ptdump_info, "kernel_page_tables"); + ptdump_initialize(); + return ptdump_debugfs_register(&kernel_ptdump_info, + "kernel_page_tables"); } device_initcall(ptdump_init); diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c new file mode 100644 index 000000000000..eee4d864350c --- /dev/null +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -0,0 +1,31 @@ +#include +#include + +#include + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct ptdump_info *info = m->private; + ptdump_walk_pgd(m, info); + return 0; +} + +static int ptdump_open(struct inode *inode, struct file *file) +{ + return single_open(file, ptdump_show, inode->i_private); +} + +static const struct file_operations ptdump_fops = { + .open = ptdump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int ptdump_debugfs_register(struct ptdump_info *info, const char *name) +{ + struct dentry *pe; + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); + return pe ? 0 : -ENOMEM; + +} diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 7c75a8d9091a..349dc3e1e52e 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -39,7 +39,7 @@ static struct mm_struct efi_mm = { .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), }; -#ifdef CONFIG_ARM64_PTDUMP +#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS #include static struct ptdump_info efi_ptdump_info = { @@ -53,7 +53,7 @@ static struct ptdump_info efi_ptdump_info = { static int __init ptdump_init(void) { - return ptdump_register(&efi_ptdump_info, "efi_page_tables"); + return ptdump_debugfs_register(&efi_ptdump_info, "efi_page_tables"); } device_initcall(ptdump_init); -- cgit v1.2.3 From ae5d1cf358a5bf0020cd25618fca10b5c5c4630e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 27 Oct 2016 09:27:32 -0700 Subject: arm64: dump: Make the page table dumping seq_file optional The page table dumping code always assumes it will be dumping to a seq_file to userspace. Future code will be taking advantage of the page table dumping code but will not need the seq_file. Make the seq_file optional for these cases. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f0f0be74d8e7..bb366490c5f7 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -50,6 +50,18 @@ static const struct addr_marker address_markers[] = { { -1, NULL }, }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + /* * The page dumper groups page table entries of the same type into a single * description. It uses pg_state to track the range information while @@ -186,7 +198,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, s = bits->clear; if (s) - seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } @@ -200,14 +212,14 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, st->level = level; st->current_prot = prot; st->start_address = addr; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; if (st->current_prot) { - seq_printf(st->seq, "0x%016lx-0x%016lx ", + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); delta = (addr - st->start_address) >> 10; @@ -215,17 +227,17 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, delta >>= 10; unit++; } - seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, pg_level[st->level].name); if (pg_level[st->level].bits) dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); - seq_puts(st->seq, "\n"); + pt_dump_seq_puts(st->seq, "\n"); } if (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; @@ -235,7 +247,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, if (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } } -- cgit v1.2.3 From cfd69e9551a1b1d20a7ba030a0990aa370817f54 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 27 Oct 2016 09:27:33 -0700 Subject: arm64: dump: Remove max_addr max_addr was added as part of struct ptdump_info but has never actually been used. Remove it. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptdump.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 16335dac3c1f..f72ee69ee602 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -30,7 +30,6 @@ struct ptdump_info { struct mm_struct *mm; const struct addr_marker *markers; unsigned long base_addr; - unsigned long max_addr; }; void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); -- cgit v1.2.3 From 1404d6f13e47c42f155e3c6a611b1bf4dd35dde9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 27 Oct 2016 09:27:34 -0700 Subject: arm64: dump: Add checking for writable and exectuable pages Page mappings with full RWX permissions are a security risk. x86 has an option to walk the page tables and dump any bad pages. (See e1a58320a38d ("x86/mm: Warn on W^X mappings")). Add a similar implementation for arm64. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Reviewed-by: Ard Biesheuvel [catalin.marinas@arm.com: folded fix for KASan out of bounds from Mark Rutland] Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 29 ++++++++++++++++++++++ arch/arm64/include/asm/ptdump.h | 8 ++++++ arch/arm64/mm/dump.c | 54 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/mmu.c | 3 +++ 4 files changed, 94 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 21a5b7409641..d1ebd46872fd 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -42,6 +42,35 @@ config ARM64_RANDOMIZE_TEXT_OFFSET of TEXT_OFFSET and platforms must not require a specific value. +config DEBUG_WX + bool "Warn on W+X mappings at boot" + select ARM64_PTDUMP_CORE + ---help--- + Generate a warning if any W+X mappings are found at boot. + + This is useful for discovering cases where the kernel is leaving + W+X mappings after applying NX, as such mappings are a security risk. + This check also includes UXN, which should be set on all kernel + mappings. + + Look for a message in dmesg output like this: + + arm64/mm: Checked W+X mappings: passed, no W+X pages found. + + or like this, if the check failed: + + arm64/mm: Checked W+X mappings: FAILED, W+X pages found. + + Note that even if the check fails, your kernel is possibly + still fine, as W+X mappings are not a security hole in + themselves, what they do is that they make the exploitation + of other unfixed kernel bugs easier. + + There is no runtime or memory usage effect of this option + once the kernel has booted up - it's a one time check. + + If in doubt, say "Y". + config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" depends on MODULES diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index f72ee69ee602..6afd8476c60c 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -42,5 +42,13 @@ static inline int ptdump_debugfs_register(struct ptdump_info *info, return 0; } #endif +void ptdump_check_wx(void); #endif /* CONFIG_ARM64_PTDUMP_CORE */ + +#ifdef CONFIG_DEBUG_WX +#define debug_checkwx() ptdump_check_wx() +#else +#define debug_checkwx() do { } while (0) +#endif + #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index bb366490c5f7..ca74a2aace42 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -74,6 +74,9 @@ struct pg_state { unsigned long start_address; unsigned level; u64 current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; }; struct prot_bits { @@ -202,6 +205,35 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, } } +static void note_prot_uxn(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if ((st->current_prot & PTE_UXN) == PTE_UXN) + return; + + WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY) + return; + if ((st->current_prot & PTE_PXN) == PTE_PXN) + return; + + WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val) { @@ -219,6 +251,8 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, unsigned long delta; if (st->current_prot) { + note_prot_uxn(st, addr); + note_prot_wx(st, addr); pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); @@ -344,6 +378,26 @@ static struct ptdump_info kernel_ptdump_info = { .base_addr = VA_START, }; +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, + .check_wx = true, + }; + + walk_pgd(&st, &init_mm, 0); + note_page(&st, 0, 0, 0); + if (st.wx_pages || st.uxn_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", + st.wx_pages, st.uxn_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + static int ptdump_init(void) { ptdump_initialize(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index dd5f12d0959e..17243e43184e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,6 +40,7 @@ #include #include #include +#include u64 idmap_t0sz = TCR_T0SZ(VA_BITS); @@ -438,6 +439,8 @@ void mark_rodata_ro(void) /* flush the TLBs after updating live kernel mappings */ flush_tlb_all(); + + debug_checkwx(); } static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, -- cgit v1.2.3 From c2249707ee53b5dd696f0fae8543a754684ea04a Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:41 +0530 Subject: arm64: kprobe: protect/rename few definitions to be reused by uprobe decode-insn code has to be reused by arm64 uprobe implementation as well. Therefore, this patch protects some portion of kprobe code and renames few other, so that decode-insn functionality can be reused by uprobe even when CONFIG_KPROBES is not defined. kprobe_opcode_t and struct arch_specific_insn are also defined by linux/kprobes.h, when CONFIG_KPROBES is not defined. So, protect these definitions in asm/probes.h. linux/kprobes.h already includes asm/kprobes.h. Therefore, remove inclusion of asm/kprobes.h from decode-insn.c. There are some definitions like kprobe_insn and kprobes_handler_t etc can be re-used by uprobe. So, it would be better to remove 'k' from their names. struct arch_specific_insn is specific to kprobe. Therefore, introduce a new struct arch_probe_insn which will be common for both kprobe and uprobe, so that decode-insn code can be shared. Modify kprobe code accordingly. Function arm_probe_decode_insn() will be needed by uprobe as well. So make it global. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/probes.h | 19 ++++++++++-------- arch/arm64/kernel/probes/decode-insn.c | 32 ++++++++++++++++-------------- arch/arm64/kernel/probes/decode-insn.h | 8 ++++++-- arch/arm64/kernel/probes/kprobes.c | 36 +++++++++++++++++----------------- 4 files changed, 52 insertions(+), 43 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 5af574d632fa..e175a825b187 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -17,19 +17,22 @@ #include -struct kprobe; -struct arch_specific_insn; - -typedef u32 kprobe_opcode_t; -typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); +typedef u32 probe_opcode_t; +typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); /* architecture specific copy of original instruction */ -struct arch_specific_insn { - kprobe_opcode_t *insn; +struct arch_probe_insn { + probe_opcode_t *insn; pstate_check_t *pstate_cc; - kprobes_handler_t *handler; + probes_handler_t *handler; /* restore address after step xol */ unsigned long restore; }; +#ifdef CONFIG_KPROBES +typedef u32 kprobe_opcode_t; +struct arch_specific_insn { + struct arch_probe_insn api; +}; +#endif #endif diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index d1731bf977ef..8a29d2982eec 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -78,8 +78,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * INSN_GOOD If instruction is supported and uses instruction slot, * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. */ -static enum kprobe_insn __kprobes -arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +enum probe_insn __kprobes +arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api) { /* * Instructions reading or modifying the PC won't work from the XOL @@ -89,26 +89,26 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) return INSN_GOOD; if (aarch64_insn_is_bcond(insn)) { - asi->handler = simulate_b_cond; + api->handler = simulate_b_cond; } else if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn)) { - asi->handler = simulate_cbz_cbnz; + api->handler = simulate_cbz_cbnz; } else if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) { - asi->handler = simulate_tbz_tbnz; + api->handler = simulate_tbz_tbnz; } else if (aarch64_insn_is_adr_adrp(insn)) { - asi->handler = simulate_adr_adrp; + api->handler = simulate_adr_adrp; } else if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) { - asi->handler = simulate_b_bl; + api->handler = simulate_b_bl; } else if (aarch64_insn_is_br(insn) || aarch64_insn_is_blr(insn) || aarch64_insn_is_ret(insn)) { - asi->handler = simulate_br_blr_ret; + api->handler = simulate_br_blr_ret; } else if (aarch64_insn_is_ldr_lit(insn)) { - asi->handler = simulate_ldr_literal; + api->handler = simulate_ldr_literal; } else if (aarch64_insn_is_ldrsw_lit(insn)) { - asi->handler = simulate_ldrsw_literal; + api->handler = simulate_ldrsw_literal; } else { /* * Instruction cannot be stepped out-of-line and we don't @@ -120,6 +120,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) return INSN_GOOD_NO_SLOT; } +#ifdef CONFIG_KPROBES static bool __kprobes is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) { @@ -138,12 +139,12 @@ is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) return false; } -enum kprobe_insn __kprobes +enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) { - enum kprobe_insn decoded; - kprobe_opcode_t insn = le32_to_cpu(*addr); - kprobe_opcode_t *scan_end = NULL; + enum probe_insn decoded; + probe_opcode_t insn = le32_to_cpu(*addr); + probe_opcode_t *scan_end = NULL; unsigned long size = 0, offset = 0; /* @@ -162,7 +163,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) else scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; } - decoded = arm_probe_decode_insn(insn, asi); + decoded = arm_probe_decode_insn(insn, &asi->api); if (decoded != INSN_REJECTED && scan_end) if (is_probed_address_atomic(addr - 1, scan_end)) @@ -170,3 +171,4 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) return decoded; } +#endif diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h index d438289646a6..76d3f315407f 100644 --- a/arch/arm64/kernel/probes/decode-insn.h +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -23,13 +23,17 @@ */ #define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) -enum kprobe_insn { +enum probe_insn { INSN_REJECTED, INSN_GOOD_NO_SLOT, INSN_GOOD, }; -enum kprobe_insn __kprobes +#ifdef CONFIG_KPROBES +enum probe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); +#endif +enum probe_insn __kprobes +arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi); #endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index f5077ea7af6d..1decd2b2c730 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -44,31 +44,31 @@ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { /* prepare insn slot */ - p->ainsn.insn[0] = cpu_to_le32(p->opcode); + p->ainsn.api.insn[0] = cpu_to_le32(p->opcode); - flush_icache_range((uintptr_t) (p->ainsn.insn), - (uintptr_t) (p->ainsn.insn) + + flush_icache_range((uintptr_t) (p->ainsn.api.insn), + (uintptr_t) (p->ainsn.api.insn) + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); /* * Needs restoring of return address after stepping xol. */ - p->ainsn.restore = (unsigned long) p->addr + + p->ainsn.api.restore = (unsigned long) p->addr + sizeof(kprobe_opcode_t); } static void __kprobes arch_prepare_simulate(struct kprobe *p) { /* This instructions is not executed xol. No need to adjust the PC */ - p->ainsn.restore = 0; + p->ainsn.api.restore = 0; } static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (p->ainsn.handler) - p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + if (p->ainsn.api.handler) + p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs); /* single step simulated, now go for post processing */ post_kprobe_handler(kcb, regs); @@ -98,18 +98,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; case INSN_GOOD_NO_SLOT: /* insn need simulation */ - p->ainsn.insn = NULL; + p->ainsn.api.insn = NULL; break; case INSN_GOOD: /* instruction uses slot */ - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) + p->ainsn.api.insn = get_insn_slot(); + if (!p->ainsn.api.insn) return -ENOMEM; break; }; /* prepare the instruction */ - if (p->ainsn.insn) + if (p->ainsn.api.insn) arch_prepare_ss_slot(p); else arch_prepare_simulate(p); @@ -142,9 +142,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, 0); - p->ainsn.insn = NULL; + if (p->ainsn.api.insn) { + free_insn_slot(p->ainsn.api.insn, 0); + p->ainsn.api.insn = NULL; } } @@ -244,9 +244,9 @@ static void __kprobes setup_singlestep(struct kprobe *p, } - if (p->ainsn.insn) { + if (p->ainsn.api.insn) { /* prepare for single stepping */ - slot = (unsigned long)p->ainsn.insn; + slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ @@ -295,8 +295,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) return; /* return addr restore if non-branching insn */ - if (cur->ainsn.restore != 0) - instruction_pointer_set(regs, cur->ainsn.restore); + if (cur->ainsn.api.restore != 0) + instruction_pointer_set(regs, cur->ainsn.api.restore); /* restore back original saved kprobe variables and continue */ if (kcb->kprobe_status == KPROBE_REENTER) { -- cgit v1.2.3 From b66c9870e9746038a532f87632bee8c0dcbeee3a Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:42 +0530 Subject: arm64: kgdb_step_brk_fn: ignore other's exception ARM64 step exception does not have any syndrome information. So, it is responsibility of exception handler to take care that they handle it only if exception was raised for them. Since kgdb_step_brk_fn() always returns 0, therefore we might have problem when we will have other step handler registered as well. This patch fixes kgdb_step_brk_fn() to return error in case of step handler was not meant for kgdb. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kgdb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index e017a9493b92..d217c9e95b06 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -247,6 +247,9 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { + if (!kgdb_single_step) + return DBG_HOOK_ERROR; + kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } -- cgit v1.2.3 From 3fb69640fed1dd4d8f5b81b2ee7ccaf82e369bf5 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:43 +0530 Subject: arm64: Handle TRAP_TRACE for user mode as well uprobe registers a handler at step_hook. So, single_step_handler now checks for user mode as well if there is a valid hook. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/kernel/debug-monitors.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 73ae90ef434c..a8f8de012250 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -226,6 +226,8 @@ static void send_user_sigtrap(int si_code) static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { + bool handler_found = false; + /* * If we are stepping a pending breakpoint, call the hw_breakpoint * handler first. @@ -233,7 +235,14 @@ static int single_step_handler(unsigned long addr, unsigned int esr, if (!reinstall_suspended_bps(regs)) return 0; - if (user_mode(regs)) { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + handler_found = true; +#endif + if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + handler_found = true; + + if (!handler_found && user_mode(regs)) { send_user_sigtrap(TRAP_TRACE); /* @@ -243,15 +252,8 @@ static int single_step_handler(unsigned long addr, unsigned int esr, * to the active-not-pending state). */ user_rewind_single_step(current); - } else { -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) - return 0; -#endif - if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; - - pr_warning("Unexpected kernel single-step exception at EL1\n"); + } else if (!handler_found) { + pr_warn("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be * returning to regs. -- cgit v1.2.3 From 53d07e21854de064d9625853b8303357482b25b1 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:44 +0530 Subject: arm64: Handle TRAP_BRKPT for user mode as well uprobe is registered at break_hook with a unique ESR code. So, when a TRAP_BRKPT occurs, call_break_hook checks if it was for uprobe. If not, then send a SIGTRAP to user. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/kernel/debug-monitors.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a8f8de012250..605df76f0a06 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -306,16 +306,20 @@ NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - if (user_mode(regs)) { - send_user_sigtrap(TRAP_BRKPT); - } + bool handler_found = false; + #ifdef CONFIG_KPROBES - else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { - if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) - return -EFAULT; + if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED) + handler_found = true; } #endif - else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED) + handler_found = true; + + if (!handler_found && user_mode(regs)) { + send_user_sigtrap(TRAP_BRKPT); + } else if (!handler_found) { pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } -- cgit v1.2.3 From 06beb72fbe23e348cc423dd9310c6fc02cd7c7da Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:45 +0530 Subject: arm64: introduce mm context flag to keep 32 bit task information We need to decide in some cases like uprobe instruction analysis that whether the current mm context belongs to a 32 bit task or 64 bit. This patch has introduced an unsigned flag variable in mm_context_t. Currently, we set and clear TIF_32BIT depending on the condition that whether an elf binary load sets personality for 32 bit or 64 bit respectively. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/elf.h | 12 ++++++++++-- arch/arm64/include/asm/mmu.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index a55384f4a5d7..5d1700425efe 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -138,7 +138,11 @@ typedef struct user_fpsimd_state elf_fpregset_t; */ #define ELF_PLAT_INIT(_r, load_addr) (_r)->regs[0] = 0 -#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT); +#define SET_PERSONALITY(ex) \ +({ \ + clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ + clear_thread_flag(TIF_32BIT); \ +}) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ @@ -183,7 +187,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread -#define COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT); +#define COMPAT_SET_PERSONALITY(ex) \ +({ \ + set_bit(TIF_32BIT, ¤t->mm->context.flags); \ + set_thread_flag(TIF_32BIT); \ + }) #define COMPAT_ARCH_DLINFO extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp); diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a81454ad5455..47619411f0ff 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,7 @@ typedef struct { atomic64_t id; void *vdso; + unsigned long flags; } mm_context_t; /* -- cgit v1.2.3 From 9842ceae9fa8deae141533d52a6ead7666962c09 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Wed, 2 Nov 2016 14:40:46 +0530 Subject: arm64: Add uprobe support This patch adds support for uprobe on ARM64 architecture. Unit tests for following have been done so far and they have been found working 1. Step-able instructions, like sub, ldr, add etc. 2. Simulation-able like ret, cbnz, cbz etc. 3. uretprobe 4. Reject-able instructions like sev, wfe etc. 5. trapped and abort xol path 6. probe at unaligned user address. 7. longjump test cases Currently it does not support aarch32 instruction probing. Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 + arch/arm64/include/asm/cacheflush.h | 1 + arch/arm64/include/asm/debug-monitors.h | 3 + arch/arm64/include/asm/ptrace.h | 8 ++ arch/arm64/include/asm/thread_info.h | 5 +- arch/arm64/include/asm/uprobes.h | 36 ++++++ arch/arm64/kernel/probes/Makefile | 2 + arch/arm64/kernel/probes/uprobes.c | 216 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/signal.c | 3 + arch/arm64/mm/flush.c | 2 +- 10 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/uprobes.h create mode 100644 arch/arm64/kernel/probes/uprobes.c (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 969ef880d234..77a807a844ac 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -238,6 +238,9 @@ config PGTABLE_LEVELS default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 +config ARCH_SUPPORTS_UPROBES + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 2e5fb976a572..e9f64ecb75ce 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -71,6 +71,7 @@ extern void __flush_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); +extern void sync_icache_aliases(void *kaddr, unsigned long len); static inline void flush_cache_mm(struct mm_struct *mm) { diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index b71420a12f26..a44cf5225429 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -68,6 +68,9 @@ #define BRK64_ESR_MASK 0xFFFF #define BRK64_ESR_KPROBES 0x0004 #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) +/* uprobes BRK opcodes with ESR encoding */ +#define BRK64_ESR_UPROBES 0x0005 +#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5)) /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index ada08b5b036d..513daf050e84 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -217,6 +217,14 @@ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); #include +#define procedure_link_pointer(regs) ((regs)->regs[30]) + +static inline void procedure_link_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + procedure_link_pointer(regs) = val; +} + #undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e9ea5a6bd449..f6859831462e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -112,6 +112,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -132,10 +133,12 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_UPROBE) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h new file mode 100644 index 000000000000..8d004073d0e8 --- /dev/null +++ b/arch/arm64/include/asm/uprobes.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2014-2016 Pratyush Anand + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ASM_UPROBES_H +#define _ASM_UPROBES_H + +#include +#include +#include + +#define MAX_UINSN_BYTES AARCH64_INSN_SIZE + +#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES +#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE +#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES + +typedef u32 uprobe_opcode_t; + +struct arch_uprobe_task { +}; + +struct arch_uprobe { + union { + u8 insn[MAX_UINSN_BYTES]; + u8 ixol[MAX_UINSN_BYTES]; + }; + struct arch_probe_insn api; + bool simulate; +}; + +#endif diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile index ce06312e3d34..89b6df613dde 100644 --- a/arch/arm64/kernel/probes/Makefile +++ b/arch/arm64/kernel/probes/Makefile @@ -1,3 +1,5 @@ obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ kprobes_trampoline.o \ simulate-insn.o +obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c new file mode 100644 index 000000000000..26c998534dca --- /dev/null +++ b/arch/arm64/kernel/probes/uprobes.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2014-2016 Pratyush Anand + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include + +#include "decode-insn.h" + +#define UPROBE_INV_FAULT_CODE UINT_MAX + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + void *xol_page_kaddr = kmap_atomic(page); + void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK); + + /* Initialize the slot */ + memcpy(dst, src, len); + + /* flush caches (dcache/icache) */ + sync_icache_aliases(dst, len); + + kunmap_atomic(xol_page_kaddr); +} + +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + probe_opcode_t insn; + + /* TODO: Currently we do not support AARCH32 instruction probing */ + if (test_bit(TIF_32BIT, &mm->context.flags)) + return -ENOTSUPP; + else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) + return -EINVAL; + + insn = *(probe_opcode_t *)(&auprobe->insn[0]); + + switch (arm_probe_decode_insn(insn, &auprobe->api)) { + case INSN_REJECTED: + return -EINVAL; + + case INSN_GOOD_NO_SLOT: + auprobe->simulate = true; + break; + + default: + break; + } + + return 0; +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + /* Initialize with an invalid fault code to detect if ol insn trapped */ + current->thread.fault_code = UPROBE_INV_FAULT_CODE; + + /* Instruction points to execute ol */ + instruction_pointer_set(regs, utask->xol_vaddr); + + user_enable_single_step(current); + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.fault_code != UPROBE_INV_FAULT_CODE); + + /* Instruction points to execute next to breakpoint address */ + instruction_pointer_set(regs, utask->vaddr + 4); + + user_disable_single_step(current); + + return 0; +} +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + /* + * Between arch_uprobe_pre_xol and arch_uprobe_post_xol, if an xol + * insn itself is trapped, then detect the case with the help of + * invalid fault code which is being set in arch_uprobe_pre_xol + */ + if (t->thread.fault_code != UPROBE_INV_FAULT_CODE) + return true; + + return false; +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + probe_opcode_t insn; + unsigned long addr; + + if (!auprobe->simulate) + return false; + + insn = *(probe_opcode_t *)(&auprobe->insn[0]); + addr = instruction_pointer(regs); + + if (auprobe->api.handler) + auprobe->api.handler(insn, addr, regs); + + return true; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + /* + * Task has received a fatal signal, so reset back to probbed + * address. + */ + instruction_pointer_set(regs, utask->vaddr); + + user_disable_single_step(current); +} + +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + /* + * If a simple branch instruction (B) was called for retprobed + * assembly label then return true even when regs->sp and ret->stack + * are same. It will ensure that cleanup and reporting of return + * instances corresponding to callee label is done when + * handle_trampoline for called function is executed. + */ + if (ctx == RP_CHECK_CHAIN_CALL) + return regs->sp <= ret->stack; + else + return regs->sp < ret->stack; +} + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, + struct pt_regs *regs) +{ + unsigned long orig_ret_vaddr; + + orig_ret_vaddr = procedure_link_pointer(regs); + /* Replace the return addr with trampoline addr */ + procedure_link_pointer_set(regs, trampoline_vaddr); + + return orig_ret_vaddr; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static int uprobe_breakpoint_handler(struct pt_regs *regs, + unsigned int esr) +{ + if (user_mode(regs) && uprobe_pre_sstep_notifier(regs)) + return DBG_HOOK_HANDLED; + + return DBG_HOOK_ERROR; +} + +static int uprobe_single_step_handler(struct pt_regs *regs, + unsigned int esr) +{ + struct uprobe_task *utask = current->utask; + + if (user_mode(regs)) { + WARN_ON(utask && + (instruction_pointer(regs) != utask->xol_vaddr + 4)); + + if (uprobe_post_sstep_notifier(regs)) + return DBG_HOOK_HANDLED; + } + + return DBG_HOOK_ERROR; +} + +/* uprobe breakpoint handler hook */ +static struct break_hook uprobes_break_hook = { + .esr_mask = BRK64_ESR_MASK, + .esr_val = BRK64_ESR_UPROBES, + .fn = uprobe_breakpoint_handler, +}; + +/* uprobe single step handler hook */ +static struct step_hook uprobes_step_hook = { + .fn = uprobe_single_step_handler, +}; + +static int __init arch_init_uprobes(void) +{ + register_break_hook(&uprobes_break_hook); + register_step_hook(&uprobes_step_hook); + + return 0; +} + +device_initcall(arch_init_uprobes); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 404dd67080b9..c7b6de62f9d3 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -414,6 +414,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, } else { local_irq_enable(); + if (thread_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 8377329d8c97..2d78d5a9b89f 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -32,7 +32,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, __flush_icache_all(); } -static void sync_icache_aliases(void *kaddr, unsigned long len) +void sync_icache_aliases(void *kaddr, unsigned long len) { unsigned long addr = (unsigned long)kaddr; -- cgit v1.2.3 From 7b03b6223105cc02672616211149ccda04184bfd Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 7 Nov 2016 23:07:22 +0530 Subject: arm64: fix error: conflicting types for 'kprobe_fault_handler' When CONFIG_KPROBE is disabled but CONFIG_UPROBE_EVENT is enabled, we get following compilation error: In file included from .../arch/arm64/kernel/probes/decode-insn.c:20:0: .../arch/arm64/include/asm/kprobes.h:52:5: error: conflicting types for 'kprobe_fault_handler' int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); ^~~~~~~~~~~~~~~~~~~~ In file included from .../arch/arm64/kernel/probes/decode-insn.c:17:0: .../include/linux/kprobes.h:398:90: note: previous definition of 'kprobe_fault_handler' was here static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) ^ .../scripts/Makefile.build:290: recipe for target 'arch/arm64/kernel/probes/decode-insn.o' failed is already included from under #ifdef CONFIG_KPROBE. So, this patch fixes the error by removing it from decode-insn.c. Reported-by: Catalin Marinas Signed-off-by: Pratyush Anand Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/decode-insn.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index 8a29d2982eec..6bf6657a5a52 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 6ed0038d5ecb307298bbb04e74eb96c4a500ad1f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 9 Nov 2016 16:52:21 +0000 Subject: arm64: Fix typo in add_default_hugepagesz() for 64K pages The default hugepage size when 64K pages are enabled is set to 2MB using the contiguous PTE bit. The add_default_hugepagesz(), however, uses CONT_PMD_SHIFT instead of CONT_PTE_SHIFT. There is no functional change since the values are the same. Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2e49bd252fe7..0a4c97b618ec 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -323,7 +323,7 @@ __setup("hugepagesz=", setup_hugepagesz); static __init int add_default_hugepagesz(void) { if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PMD_SHIFT); + hugetlb_add_hstate(CONT_PTE_SHIFT); return 0; } arch_initcall(add_default_hugepagesz); -- cgit v1.2.3 From 20156ce2365d61beaa6f5a78a7a789044e0e7acc Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 8 Nov 2016 13:44:38 +0800 Subject: arm64: hugetlb: remove the wrong pmd check in find_num_contig() The find_num_contig() will return 1 when the pmd is not present. It will cause a kernel dead loop in the following scenaro: 1.) pmd entry is not present. 2.) the page fault occurs: ... hugetlb_fault() --> hugetlb_no_page() --> set_huge_pte_at() 3.) set_huge_pte_at() will only set the first PMD entry, since the find_num_contig just return 1 in this case. So the PMD entries are all empty except the first one. 4.) when kernel accesses the address mapped by the second PMD entry, a new page fault occurs: ... hugetlb_fault() --> huge_ptep_set_access_flags() The second PMD entry is still empty now. 5.) When the kernel returns, the access will cause a page fault again. The kernel will run like the "4)" above. We will see a dead loop since here. The dead loop is caught in the 32M hugetlb page (2M PMD + Contiguous bit). This patch removes wrong pmd check, and fixes this dead loop. This patch also removes the redundant checks for PGD/PUD in the find_num_contig(). Acked-by: Steve Capper Signed-off-by: Huang Shijie Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0a4c97b618ec..9a1069dd889f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,20 +51,8 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, *pgsize = PAGE_SIZE; if (!pte_cont(pte)) return 1; - if (!pgd_present(*pgd)) { - VM_BUG_ON(!pgd_present(*pgd)); - return 1; - } pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - VM_BUG_ON(!pud_present(*pud)); - return 1; - } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - VM_BUG_ON(!pmd_present(*pmd)); - return 1; - } if ((pte_t *)pmd == ptep) { *pgsize = PMD_SIZE; return CONT_PMDS; -- cgit v1.2.3 From 0c2f0afe3582c58efeef93bc57bc07d502132618 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 8 Nov 2016 13:44:39 +0800 Subject: arm64: hugetlb: fix the wrong address for several functions The libhugetlbfs meets several failures since the following functions do not use the correct address: huge_ptep_get_and_clear() huge_ptep_set_access_flags() huge_ptep_set_wrprotect() huge_ptep_clear_flush() This patch fixes the wrong address for them. Signed-off-by: Huang Shijie Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/hugetlbpage.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 9a1069dd889f..964b7549af5c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -200,7 +200,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); /* save the 1st pte to return */ pte = ptep_get_and_clear(mm, addr, cpte); - for (i = 1; i < ncontig; ++i) { + for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) { /* * If HW_AFDBM is enabled, then the HW could * turn on the dirty bit for any of the page @@ -238,7 +238,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pfn = pte_pfn(*cpte); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) { + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) { changed = ptep_set_access_flags(vma, addr, cpte, pfn_pte(pfn, hugeprot), @@ -261,7 +261,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, cpte = huge_pte_offset(mm, addr); ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_set_wrprotect(mm, addr, cpte); } else { ptep_set_wrprotect(mm, addr, ptep); @@ -279,7 +279,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, cpte = huge_pte_offset(vma->vm_mm, addr); ncontig = find_num_contig(vma->vm_mm, addr, cpte, *cpte, &pgsize); - for (i = 0; i < ncontig; ++i, ++cpte) + for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) ptep_clear_flush(vma, addr, cpte); } else { ptep_clear_flush(vma, addr, ptep); -- cgit v1.2.3 From 094339443e6e247254a275ec5e5d8418ad1b2d25 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 4 Nov 2016 18:17:06 +0000 Subject: arm64: percpu: kill off final ACCESS_ONCE() uses For several reasons it is preferable to use {READ,WRITE}_ONCE() rather than ACCESS_ONCE(). For example, these handle aggregate types, result in shorter source code, and better document the intended access (which may be useful for instrumentation features such as the upcoming KTSAN). Over a number of patches, most uses of ACCESS_ONCE() in arch/arm64 have been migrated to {READ,WRITE}_ONCE(). For consistency, and the above reasons, this patch migrates the final remaining uses. Signed-off-by: Mark Rutland Cc: Will Deacon Acked-by: Dmitry Vyukov Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/percpu.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 5394c8405e66..4afb6fcc85ce 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -101,16 +101,16 @@ static inline unsigned long __percpu_read(void *ptr, int size) switch (size) { case 1: - ret = ACCESS_ONCE(*(u8 *)ptr); + ret = READ_ONCE(*(u8 *)ptr); break; case 2: - ret = ACCESS_ONCE(*(u16 *)ptr); + ret = READ_ONCE(*(u16 *)ptr); break; case 4: - ret = ACCESS_ONCE(*(u32 *)ptr); + ret = READ_ONCE(*(u32 *)ptr); break; case 8: - ret = ACCESS_ONCE(*(u64 *)ptr); + ret = READ_ONCE(*(u64 *)ptr); break; default: BUILD_BUG(); @@ -123,16 +123,16 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size) { switch (size) { case 1: - ACCESS_ONCE(*(u8 *)ptr) = (u8)val; + WRITE_ONCE(*(u8 *)ptr, (u8)val); break; case 2: - ACCESS_ONCE(*(u16 *)ptr) = (u16)val; + WRITE_ONCE(*(u16 *)ptr, (u16)val); break; case 4: - ACCESS_ONCE(*(u32 *)ptr) = (u32)val; + WRITE_ONCE(*(u32 *)ptr, (u32)val); break; case 8: - ACCESS_ONCE(*(u64 *)ptr) = (u64)val; + WRITE_ONCE(*(u64 *)ptr, (u64)val); break; default: BUILD_BUG(); -- cgit v1.2.3 From dcbe02855f048fdf1e13ebc697e83c8d297f9f5a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:03 +0000 Subject: arm64: thread_info remove stale items We have a comment claiming __switch_to() cares about where cpu_context is located relative to cpu_domain in thread_info. However arm64 has never had a thread_info::cpu_domain field, and neither __switch_to nor cpu_switch_to care where the cpu_context field is relative to others. Additionally, the init_thread_info alias is never used anywhere in the kernel, and will shortly become problematic when thread_info is moved into task_struct. This patch removes both. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: James Morse Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/thread_info.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f6859831462e..6ad76efbb3aa 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -42,7 +42,6 @@ typedef unsigned long mm_segment_t; /* * low level task data that entry.S needs immediate access to. - * __switch_to() assumes cpu_context follows immediately after cpu_domain. */ struct thread_info { unsigned long flags; /* low level flags */ @@ -60,7 +59,6 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -#define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) /* -- cgit v1.2.3 From 3fe12da4c7fa6491e0fb7c5371716ac7f8ea80a5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:04 +0000 Subject: arm64: asm-offsets: remove unused definitions Subsequent patches will move the thread_info::{task,cpu} fields, and the current TI_{TASK,CPU} offset definitions are not used anywhere. This patch removes the redundant definitions. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: James Morse Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/asm-offsets.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 4a2f0f0fef32..d30b2321c0ee 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -39,8 +39,6 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); - DEFINE(TI_TASK, offsetof(struct thread_info, task)); - DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); -- cgit v1.2.3 From a9ea0017ebe8889dfa136cac2aa7ae0ee6915e1f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:05 +0000 Subject: arm64: factor out current_stack_pointer We define current_stack_pointer in , though other files and header relying upon it do not have this necessary include, and are thus fragile to changes in the header soup. Subsequent patches will affect the header soup such that directly including may result in a circular header include in some of these cases, so we can't simply include . Instead, factor current_thread_info into its own header, and have all existing users include this explicitly. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/percpu.h | 2 ++ arch/arm64/include/asm/perf_event.h | 2 ++ arch/arm64/include/asm/stack_pointer.h | 9 +++++++++ arch/arm64/include/asm/thread_info.h | 6 +----- arch/arm64/kernel/return_address.c | 1 + arch/arm64/kernel/stacktrace.c | 1 + arch/arm64/kernel/traps.c | 1 + 7 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/include/asm/stack_pointer.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 4afb6fcc85ce..3bd498e4de4c 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,6 +16,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include + static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2065f46fa740..9eee2beb4dbc 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -17,6 +17,8 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H +#include + #define ARMV8_PMU_MAX_COUNTERS 32 #define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h new file mode 100644 index 000000000000..ffcdf742cddf --- /dev/null +++ b/arch/arm64/include/asm/stack_pointer.h @@ -0,0 +1,9 @@ +#ifndef __ASM_STACK_POINTER_H +#define __ASM_STACK_POINTER_H + +/* + * how to get the current stack pointer from C + */ +register unsigned long current_stack_pointer asm ("sp"); + +#endif /* __ASM_STACK_POINTER_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6ad76efbb3aa..bce0f07483c1 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -36,6 +36,7 @@ struct task_struct; +#include #include typedef unsigned long mm_segment_t; @@ -61,11 +62,6 @@ struct thread_info { #define init_stack (init_thread_union.stack) -/* - * how to get the current stack pointer from C - */ -register unsigned long current_stack_pointer asm ("sp"); - /* * how to get the thread information struct from C */ diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c index 1718706fde83..12a87f2600f2 100644 --- a/arch/arm64/kernel/return_address.c +++ b/arch/arm64/kernel/return_address.c @@ -12,6 +12,7 @@ #include #include +#include #include struct return_address_data { diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index c2efddfca18c..5b8006819cde 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -22,6 +22,7 @@ #include #include +#include #include /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c9986b3e0a96..95a545200a52 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 876e7a38e8788773aac768091aaa3b42e470c03b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:06 +0000 Subject: arm64: traps: simplify die() and __die() In arm64's die and __die routines we pass around a thread_info, and subsequently use this to determine the relevant task_struct, and the end of the thread's stack. Subsequent patches will decouple thread_info from the stack, and this approach will no longer work. To figure out the end of the stack, we can use the new generic end_of_stack() helper. As we only call __die() from die(), and die() always deals with the current task, we can remove the parameter and have both acquire current directly, which also makes it clear that __die can't be called for arbitrary tasks. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 95a545200a52..7ac30bf943e9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -228,10 +228,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #endif #define S_SMP " SMP" -static int __die(const char *str, int err, struct thread_info *thread, - struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; @@ -246,7 +245,8 @@ static int __die(const char *str, int err, struct thread_info *thread, print_modules(); __show_regs(regs); pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), + end_of_stack(tsk)); if (!user_mode(regs)) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, @@ -265,7 +265,6 @@ static DEFINE_RAW_SPINLOCK(die_lock); */ void die(const char *str, struct pt_regs *regs, int err) { - struct thread_info *thread = current_thread_info(); int ret; oops_enter(); @@ -273,9 +272,9 @@ void die(const char *str, struct pt_regs *regs, int err) raw_spin_lock_irq(&die_lock); console_verbose(); bust_spinlocks(1); - ret = __die(str, err, thread, regs); + ret = __die(str, err, regs); - if (regs && kexec_should_crash(thread->task)) + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); -- cgit v1.2.3 From 2020a5ae7c8c2c8504565004915017507b135c63 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:07 +0000 Subject: arm64: unexport walk_stackframe The walk_stackframe functions is architecture-specific, with a varying prototype, and common code should not use it directly. None of its current users can be built as modules. With THREAD_INFO_IN_TASK, users will also need to hold a stack reference before calling it. There's no reason for it to be exported, and it's very easy to misuse, so unexport it for now. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 5b8006819cde..d53f99d4c223 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -129,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame, break; } } -EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { -- cgit v1.2.3 From 9bbd4c56b0b642f04396da378296e68096d5afca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:08 +0000 Subject: arm64: prep stack walkers for THREAD_INFO_IN_TASK When CONFIG_THREAD_INFO_IN_TASK is selected, task stacks may be freed before a task is destroyed. To account for this, the stacks are refcounted, and when manipulating the stack of another task, it is necessary to get/put the stack to ensure it isn't freed and/or re-used while we do so. This patch reworks the arm64 stack walking code to account for this. When CONFIG_THREAD_INFO_IN_TASK is not selected these perform no refcounting, and this should only be a structural change that does not affect behaviour. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: AKASHI Takahiro Cc: Andy Lutomirski Cc: James Morse Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/process.c | 20 ++++++++++++++------ arch/arm64/kernel/stacktrace.c | 5 +++++ arch/arm64/kernel/traps.c | 5 +++++ 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 01753cd7d3f0..ec7b9c00effe 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -350,27 +350,35 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; - unsigned long stack_page; + unsigned long stack_page, ret = 0; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; + stack_page = (unsigned long)try_get_task_stack(p); + if (!stack_page) + return 0; + frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame.graph = p->curr_ret_stack; #endif - stack_page = (unsigned long)task_stack_page(p); do { if (frame.sp < stack_page || frame.sp >= stack_page + THREAD_SIZE || unwind_frame(p, &frame)) - return 0; - if (!in_sched_functions(frame.pc)) - return frame.pc; + goto out; + if (!in_sched_functions(frame.pc)) { + ret = frame.pc; + goto out; + } } while (count ++ < 16); - return 0; + +out: + put_task_stack(p); + return ret; } unsigned long arch_align_stack(unsigned long sp) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d53f99d4c223..8a552a33c6ef 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -181,6 +181,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) struct stack_trace_data data; struct stackframe frame; + if (!try_get_task_stack(tsk)) + return; + data.trace = trace; data.skip = trace->skip; @@ -202,6 +205,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) walk_stackframe(tsk, &frame, save_trace, &data); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; + + put_task_stack(tsk); } void save_stack_trace(struct stack_trace *trace) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7ac30bf943e9..4731133286b3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -148,6 +148,9 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (!tsk) tsk = current; + if (!try_get_task_stack(tsk)) + return; + /* * Switching between stacks is valid when tracing current and in * non-preemptible context. @@ -213,6 +216,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack + sizeof(struct pt_regs)); } } + + put_task_stack(tsk); } void show_stack(struct task_struct *tsk, unsigned long *sp) -- cgit v1.2.3 From 623b476fc815464a0241ea7483da7b3580b7d8ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:09 +0000 Subject: arm64: move sp_el0 and tpidr_el1 into cpu_suspend_ctx When returning from idle, we rely on the fact that thread_info lives at the end of the kernel stack, and restore this by masking the saved stack pointer. Subsequent patches will sever the relationship between the stack and thread_info, and to cater for this we must save/restore sp_el0 explicitly, storing it in cpu_suspend_ctx. As cpu_suspend_ctx must be doubleword aligned, this leaves us with an extra slot in cpu_suspend_ctx. We can use this to save/restore tpidr_el1 in the same way, which simplifies the code, avoiding pointer chasing on the restore path (as we no longer need to load thread_info::cpu followed by the relevant slot in __per_cpu_offset based on this). This patch stashes both registers in cpu_suspend_ctx. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: James Morse Cc: Lorenzo Pieralisi Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/suspend.h | 2 +- arch/arm64/kernel/sleep.S | 3 --- arch/arm64/kernel/suspend.c | 6 ------ arch/arm64/mm/proc.S | 6 ++++++ 4 files changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index b8a313fd7a09..de5600f40adf 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -1,7 +1,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 10 +#define NR_CTX_REGS 12 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 1bec41b5fda3..df67652e46f0 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -125,9 +125,6 @@ ENTRY(_cpu_resume) /* load sp from context */ ldr x2, [x0, #CPU_CTX_SP] mov sp, x2 - /* save thread_info */ - and x2, x2, #~(THREAD_SIZE - 1) - msr sp_el0, x2 /* * cpu_do_resume expects x0 to contain context address pointer */ diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index bb0cd787a9d3..1e3be9064cfa 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -46,12 +46,6 @@ void notrace __cpu_suspend_exit(void) */ cpu_uninstall_idmap(); - /* - * Restore per-cpu offset before any kernel - * subsystem relying on it has a chance to run. - */ - set_my_cpu_offset(per_cpu_offset(cpu)); - /* * PSTATE was not saved over suspend/resume, re-enable any detected * features that might not have been set correctly. diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 352c73b6a59e..6a853a867d9e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,11 +70,14 @@ ENTRY(cpu_do_suspend) mrs x8, mdscr_el1 mrs x9, oslsr_el1 mrs x10, sctlr_el1 + mrs x11, tpidr_el1 + mrs x12, sp_el0 stp x2, x3, [x0] stp x4, xzr, [x0, #16] stp x5, x6, [x0, #32] stp x7, x8, [x0, #48] stp x9, x10, [x0, #64] + stp x11, x12, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -90,6 +93,7 @@ ENTRY(cpu_do_resume) ldp x6, x8, [x0, #32] ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] + ldp x13, x14, [x0, #80] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 @@ -112,6 +116,8 @@ ENTRY(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 + msr tpidr_el1, x13 + msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 */ -- cgit v1.2.3 From 580efaa7ccfb8c0790dce4396434f0e5ac8d86ee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:10 +0000 Subject: arm64: smp: prepare for smp_processor_id() rework Subsequent patches will make smp_processor_id() use a percpu variable. This will make smp_processor_id() dependent on the percpu offset, and thus we cannot use smp_processor_id() to figure out what to initialise the offset to. Prepare for this by initialising the percpu offset based on current::cpu, which will work regardless of how smp_processor_id() is implemented. Also, make this relationship obvious by placing this code together at the start of secondary_start_kernel(). Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 8507703dabe4..f64401faa091 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -208,7 +208,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; + + cpu = task_cpu(current); + set_my_cpu_offset(per_cpu_offset(cpu)); /* * All kernel threads share the same mm context; grab a @@ -217,8 +220,6 @@ asmlinkage void secondary_start_kernel(void) atomic_inc(&mm->mm_count); current->active_mm = mm; - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. -- cgit v1.2.3 From 57c82954e77fa12c1023e87210d2ede77aaa0058 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:11 +0000 Subject: arm64: make cpu number a percpu variable In the absence of CONFIG_THREAD_INFO_IN_TASK, core code maintains thread_info::cpu, and low-level architecture code can access this to build raw_smp_processor_id(). With CONFIG_THREAD_INFO_IN_TASK, core code maintains task_struct::cpu, which for reasons of hte header soup is not accessible to low-level arch code. Instead, we can maintain a percpu variable containing the cpu number. For both the old and new implementation of raw_smp_processor_id(), we read a syreg into a GPR, add an offset, and load the result. As the offset is now larger, it may not be folded into the load, but otherwise the assembly shouldn't change much. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: James Morse Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 11 ++++++++++- arch/arm64/kernel/smp.c | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 022644704a93..968b08de820d 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -29,11 +29,20 @@ #ifndef __ASSEMBLY__ +#include + #include #include #include -#define raw_smp_processor_id() (current_thread_info()->cpu) +DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); + +/* + * We don't use this_cpu_read(cpu_number) as that has implicit writes to + * preempt_count, and associated (compiler) barriers, that we'd like to avoid + * the expense of. If we're preemptible, the value can be stale at use anyway. + */ +#define raw_smp_processor_id() (*this_cpu_ptr(&cpu_number)) struct seq_file; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f64401faa091..6f42c68e457f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -58,6 +58,9 @@ #define CREATE_TRACE_POINTS #include +DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); + /* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core @@ -719,6 +722,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) */ for_each_possible_cpu(cpu) { + per_cpu(cpu_number, cpu) = cpu; + if (cpu == smp_processor_id()) continue; -- cgit v1.2.3 From 1b7e2296a822dfd2349960addc42a139360ce769 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:12 +0000 Subject: arm64: assembler: introduce ldr_this_cpu Shortly we will want to load a percpu variable in the return from userspace path. We can save an instruction by folding the addition of the percpu offset into the load instruction, and this patch adds a new helper to do so. At the same time, we clean up this_cpu_ptr for consistency. As with {adr,ldr,str}_l, we change the template to take the destination register first, and name this dst. Secondly, we rename the macro to adr_this_cpu, following the scheme of adr_l, and matching the newly added ldr_this_cpu. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: Ard Biesheuvel Cc: James Morse Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 19 +++++++++++++++---- arch/arm64/kernel/entry.S | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 28bfe6132eb6..128a9ca06c8a 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -202,14 +202,25 @@ lr .req x30 // link register .endm /* + * @dst: Result of per_cpu(sym, smp_processor_id()) * @sym: The name of the per-cpu variable - * @reg: Result of per_cpu(sym, smp_processor_id()) * @tmp: scratch register */ - .macro this_cpu_ptr, sym, reg, tmp - adr_l \reg, \sym + .macro adr_this_cpu, dst, sym, tmp + adr_l \dst, \sym mrs \tmp, tpidr_el1 - add \reg, \reg, \tmp + add \dst, \dst, \tmp + .endm + + /* + * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id())) + * @sym: The name of the per-cpu variable + * @tmp: scratch register + */ + .macro ldr_this_cpu dst, sym, tmp + adr_l \dst, \sym + mrs \tmp, tpidr_el1 + ldr \dst, [\dst, \tmp] .endm /* diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 223d54a4d66b..2d4c83bc1f81 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -200,7 +200,7 @@ alternative_else_nop_endif cmp x25, tsk b.ne 9998f - this_cpu_ptr irq_stack, x25, x26 + adr_this_cpu x25, irq_stack, x26 mov x26, #IRQ_STACK_START_SP add x26, x25, x26 -- cgit v1.2.3 From c02433dd6de32f042cf3ffe476746b1115b8c096 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 Nov 2016 20:23:13 +0000 Subject: arm64: split thread_info from task stack This patch moves arm64's struct thread_info from the task stack into task_struct. This protects thread_info from corruption in the case of stack overflows, and makes its address harder to determine if stack addresses are leaked, making a number of attacks more difficult. Precise detection and handling of overflow is left for subsequent patches. Largely, this involves changing code to store the task_struct in sp_el0, and acquire the thread_info from the task struct. Core code now implements current_thread_info(), and as noted in this relies on offsetof(task_struct, thread_info) == 0, enforced by core code. This change means that the 'tsk' register used in entry.S now points to a task_struct, rather than a thread_info as it used to. To make this clear, the TI_* field offsets are renamed to TSK_TI_*, with asm-offsets appropriately updated to account for the structural change. Userspace clobbers sp_el0, and we can no longer restore this from the stack. Instead, the current task is cached in a per-cpu variable that we can safely access from early assembly as interrupts are disabled (and we are thus not preemptible). Both secondary entry and idle are updated to stash the sp and task pointer separately. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Cc: AKASHI Takahiro Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: James Morse Cc: Kees Cook Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/current.h | 22 +++++++++++++++++++ arch/arm64/include/asm/smp.h | 1 + arch/arm64/include/asm/thread_info.h | 24 --------------------- arch/arm64/kernel/asm-offsets.c | 8 ++++--- arch/arm64/kernel/entry.S | 41 ++++++++++++++++++------------------ arch/arm64/kernel/head.S | 11 +++++----- arch/arm64/kernel/process.c | 16 ++++++++++++++ arch/arm64/kernel/smp.c | 2 ++ 10 files changed, 73 insertions(+), 54 deletions(-) create mode 100644 arch/arm64/include/asm/current.h (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 77a807a844ac..0b8227f23eed 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -109,6 +109,7 @@ config ARM64 select POWER_SUPPLY select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select THREAD_INFO_IN_TASK help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 44e1d7f10add..28196b18e394 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h -generic-y += current.h generic-y += delay.h generic-y += div64.h generic-y += dma.h diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h new file mode 100644 index 000000000000..f2bcbe2d9889 --- /dev/null +++ b/arch/arm64/include/asm/current.h @@ -0,0 +1,22 @@ +#ifndef __ASM_CURRENT_H +#define __ASM_CURRENT_H + +#include + +#include + +#ifndef __ASSEMBLY__ + +struct task_struct; + +static __always_inline struct task_struct *get_current(void) +{ + return (struct task_struct *)read_sysreg(sp_el0); +} + +#define current get_current() + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_CURRENT_H */ + diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 968b08de820d..a62db952ffcb 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -82,6 +82,7 @@ asmlinkage void secondary_start_kernel(void); */ struct secondary_data { void *stack; + struct task_struct *task; long status; }; diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index bce0f07483c1..c17ad4d213d0 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -47,41 +47,17 @@ typedef unsigned long mm_segment_t; struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ - struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ - int cpu; /* cpu */ }; #define INIT_THREAD_INFO(tsk) \ { \ - .task = &tsk, \ - .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ } #define init_stack (init_thread_union.stack) -/* - * how to get the thread information struct from C - */ -static inline struct thread_info *current_thread_info(void) __attribute_const__; - -/* - * struct thread_info can be accessed directly via sp_el0. - * - * We don't use read_sysreg() as we want the compiler to cache the value where - * possible. - */ -static inline struct thread_info *current_thread_info(void) -{ - unsigned long sp_el0; - - asm ("mrs %0, sp_el0" : "=r" (sp_el0)); - - return (struct thread_info *)sp_el0; -} - #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index d30b2321c0ee..c2dc9fa4f09b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -36,9 +36,10 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); - DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); - DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); + DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); BLANK(); @@ -121,6 +122,7 @@ int main(void) DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); + DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2d4c83bc1f81..6349a8324b4f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -90,9 +90,8 @@ .if \el == 0 mrs x21, sp_el0 - mov tsk, sp - and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear, - ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug + ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. mov x29, xzr // fp pointed to user-space @@ -100,10 +99,10 @@ add x21, sp, #S_FRAME_SIZE get_thread_info tsk /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ - ldr x20, [tsk, #TI_ADDR_LIMIT] + ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] str x20, [sp, #S_ORIG_ADDR_LIMIT] mov x20, #TASK_SIZE_64 - str x20, [tsk, #TI_ADDR_LIMIT] + str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 @@ -139,7 +138,7 @@ .if \el != 0 /* Restore the task's original addr_limit. */ ldr x20, [sp, #S_ORIG_ADDR_LIMIT] - str x20, [tsk, #TI_ADDR_LIMIT] + str x20, [tsk, #TSK_TI_ADDR_LIMIT] /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif @@ -192,13 +191,14 @@ alternative_else_nop_endif mov x19, sp // preserve the original sp /* - * Compare sp with the current thread_info, if the top - * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and - * should switch to the irq stack. + * Compare sp with the base of the task stack. + * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack, + * and should switch to the irq stack. */ - and x25, x19, #~(THREAD_SIZE - 1) - cmp x25, tsk - b.ne 9998f + ldr x25, [tsk, TSK_STACK] + eor x25, x25, x19 + and x25, x25, #~(THREAD_SIZE - 1) + cbnz x25, 9998f adr_this_cpu x25, irq_stack, x26 mov x26, #IRQ_STACK_START_SP @@ -427,9 +427,9 @@ el1_irq: irq_handler #ifdef CONFIG_PREEMPT - ldr w24, [tsk, #TI_PREEMPT] // get preempt count + ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count cbnz w24, 1f // preempt count != 0 - ldr x0, [tsk, #TI_FLAGS] // get flags + ldr x0, [tsk, #TSK_TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt 1: @@ -444,7 +444,7 @@ ENDPROC(el1_irq) el1_preempt: mov x24, lr 1: bl preempt_schedule_irq // irq en/disable is done inside - ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS + ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 #endif @@ -674,8 +674,7 @@ ENTRY(cpu_switch_to) ldp x29, x9, [x8], #16 ldr lr, [x8] mov sp, x9 - and x9, x9, #~(THREAD_SIZE - 1) - msr sp_el0, x9 + msr sp_el0, x1 ret ENDPROC(cpu_switch_to) @@ -686,7 +685,7 @@ ENDPROC(cpu_switch_to) ret_fast_syscall: disable_irq // disable interrupts str x0, [sp, #S_X0] // returned x0 - ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK @@ -706,14 +705,14 @@ work_pending: #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on // enabled while in userspace #endif - ldr x1, [tsk, #TI_FLAGS] // re-check for single-step + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user /* * "slow" syscall return path. */ ret_to_user: disable_irq // disable interrupts - ldr x1, [tsk, #TI_FLAGS] + ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: @@ -746,7 +745,7 @@ el0_svc_naked: // compat entry point enable_dbg_and_irq ct_user_exit 1 - ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace cmp scno, sc_nr // check upper syscall limit diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 332e33193ccf..eaafb253bbfa 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -428,7 +428,8 @@ ENDPROC(__create_page_tables) __primary_switched: adrp x4, init_thread_union add sp, x4, #THREAD_SIZE - msr sp_el0, x4 // Save thread_info + adr_l x5, init_task + msr sp_el0, x5 // Save thread_info adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address @@ -699,10 +700,10 @@ __secondary_switched: isb adr_l x0, secondary_data - ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack - mov sp, x0 - and x0, x0, #~(THREAD_SIZE - 1) - msr sp_el0, x0 // save thread_info + ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + mov sp, x1 + ldr x2, [x0, #CPU_BOOT_TASK] + msr sp_el0, x2 mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ec7b9c00effe..a98b743631c5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,20 @@ void uao_thread_switch(struct task_struct *next) } } +/* + * We store our current task in sp_el0, which is clobbered by userspace. Keep a + * shadow copy so that we can restore this upon entry from userspace. + * + * This is *only* for exception entry from EL0, and is not valid until we + * __switch_to() a user task. + */ +DEFINE_PER_CPU(struct task_struct *, __entry_task); + +static void entry_task_switch(struct task_struct *next) +{ + __this_cpu_write(__entry_task, next); +} + /* * Thread switching. */ @@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev, tls_thread_switch(next); hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); + entry_task_switch(next); uao_thread_switch(next); /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6f42c68e457f..cb87234cfcf2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * We need to tell the secondary core where to find its stack and the * page tables. */ + secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } + secondary_data.task = NULL; secondary_data.stack = NULL; status = READ_ONCE(secondary_data.status); if (ret && status) { -- cgit v1.2.3 From a4023f682739439b434165b54af7cb3676a4766e Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 8 Nov 2016 13:56:20 +0000 Subject: arm64: Add hypervisor safe helper for checking constant capabilities The hypervisor may not have full access to the kernel data structures and hence cannot safely use cpus_have_cap() helper for checking the system capability. Add a safe helper for hypervisors to check a constant system capability, which *doesn't* fall back to checking the bitmap maintained by the kernel. With this, make the cpus_have_cap() only check the bitmask and force constant cap checks to use the new API for quicker checks. Cc: Robert Ritcher Cc: Tirumalesh Chalamarla Signed-off-by: Suzuki K Poulose Reviewed-by: Will Deacon Reviewed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 19 ++++++++++++------- arch/arm64/kernel/cpufeature.c | 2 +- arch/arm64/kernel/process.c | 2 +- drivers/irqchip/irq-gic-v3.c | 13 +------------ 4 files changed, 15 insertions(+), 21 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a27c3245ba21..8b63adb148e7 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -9,8 +9,6 @@ #ifndef __ASM_CPUFEATURE_H #define __ASM_CPUFEATURE_H -#include - #include #include @@ -45,6 +43,8 @@ #ifndef __ASSEMBLY__ +#include +#include #include /* CPU feature register tracking */ @@ -122,14 +122,19 @@ static inline bool cpu_have_feature(unsigned int num) return elf_hwcap & (1UL << num); } +/* System capability check for constant caps */ +static inline bool cpus_have_const_cap(int num) +{ + if (num >= ARM64_NCAPS) + return false; + return static_branch_unlikely(&cpu_hwcap_keys[num]); +} + static inline bool cpus_have_cap(unsigned int num) { if (num >= ARM64_NCAPS) return false; - if (__builtin_constant_p(num)) - return static_branch_unlikely(&cpu_hwcap_keys[num]); - else - return test_bit(num, cpu_hwcaps); + return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) @@ -218,7 +223,7 @@ static inline bool cpu_supports_mixed_endian_el0(void) static inline bool system_supports_32bit_el0(void) { - return cpus_have_cap(ARM64_HAS_32BIT_EL0); + return cpus_have_const_cap(ARM64_HAS_32BIT_EL0); } static inline bool system_supports_mixed_endian_el0(void) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c02504ea304b..fc2bd1926607 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1102,5 +1102,5 @@ void __init setup_cpu_features(void) static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) { - return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO)); + return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a98b743631c5..a3a2816ba73a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -283,7 +283,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; if (IS_ENABLED(CONFIG_ARM64_UAO) && - cpus_have_cap(ARM64_HAS_UAO)) + cpus_have_const_cap(ARM64_HAS_UAO)) childregs->pstate |= PSR_UAO_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 19d642eae096..26e1d7fafb1e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -120,11 +120,10 @@ static void gic_redist_wait_for_rwp(void) } #ifdef CONFIG_ARM64 -static DEFINE_STATIC_KEY_FALSE(is_cavium_thunderx); static u64 __maybe_unused gic_read_iar(void) { - if (static_branch_unlikely(&is_cavium_thunderx)) + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154)) return gic_read_iar_cavium_thunderx(); else return gic_read_iar_common(); @@ -905,14 +904,6 @@ static const struct irq_domain_ops partition_domain_ops = { .select = gic_irq_domain_select, }; -static void gicv3_enable_quirks(void) -{ -#ifdef CONFIG_ARM64 - if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_23154)) - static_branch_enable(&is_cavium_thunderx); -#endif -} - static int __init gic_init_bases(void __iomem *dist_base, struct redist_region *rdist_regs, u32 nr_redist_regions, @@ -935,8 +926,6 @@ static int __init gic_init_bases(void __iomem *dist_base, gic_data.nr_redist_regions = nr_redist_regions; gic_data.redist_stride = redist_stride; - gicv3_enable_quirks(); - /* * Find out how many interrupts are supported. * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) -- cgit v1.2.3 From 82e0191a1aa11abfddb22c8944989b7735560efc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 8 Nov 2016 13:56:21 +0000 Subject: arm64: Support systems without FP/ASIMD The arm64 kernel assumes that FP/ASIMD units are always present and accesses the FP/ASIMD specific registers unconditionally. This could cause problems when they are absent. This patch adds the support for kernel handling systems without FP/ASIMD by skipping the register access within the kernel. For kvm, we trap the accesses to FP/ASIMD and inject an undefined instruction exception to the VM. The callers of the exported kernel_neon_begin_partial() should make sure that the FP/ASIMD is supported. Cc: Will Deacon Cc: Christoffer Dall Cc: Ard Biesheuvel Signed-off-by: Suzuki K Poulose Reviewed-by: Marc Zyngier [catalin.marinas@arm.com: add comment on the ARM64_HAS_NO_FPSIMD conflict and the new location] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 12 +++++++++++- arch/arm64/include/asm/neon.h | 3 ++- arch/arm64/kernel/cpufeature.c | 15 +++++++++++++++ arch/arm64/kernel/fpsimd.c | 14 ++++++++++++++ arch/arm64/kvm/handle_exit.c | 11 +++++++++++ arch/arm64/kvm/hyp/hyp-entry.S | 9 ++++++++- arch/arm64/kvm/hyp/switch.c | 5 ++++- 7 files changed, 65 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8b63adb148e7..0ef718b67c54 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -38,8 +38,13 @@ #define ARM64_HAS_32BIT_EL0 13 #define ARM64_HYP_OFFSET_LOW 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 +/* + * The macro below will be moved to asm/cpucaps.h together with the + * ARM64_NCAPS update. + */ +#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_NCAPS 16 +#define ARM64_NCAPS 17 #ifndef __ASSEMBLY__ @@ -231,6 +236,11 @@ static inline bool system_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); } +static inline bool system_supports_fpsimd(void) +{ + return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h index 13ce4cc18e26..ad4cdc966c0f 100644 --- a/arch/arm64/include/asm/neon.h +++ b/arch/arm64/include/asm/neon.h @@ -9,8 +9,9 @@ */ #include +#include -#define cpu_has_neon() (1) +#define cpu_has_neon() system_supports_fpsimd() #define kernel_neon_begin() kernel_neon_begin_partial(32) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index fc2bd1926607..f89385d794f6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -746,6 +746,14 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); } +static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused) +{ + u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); + + return cpuid_feature_extract_signed_field(pfr0, + ID_AA64PFR0_FP_SHIFT) < 0; +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -829,6 +837,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .def_scope = SCOPE_SYSTEM, .matches = hyp_offset_low, }, + { + /* FP/SIMD is not implemented */ + .capability = ARM64_HAS_NO_FPSIMD, + .def_scope = SCOPE_SYSTEM, + .min_field_value = 0, + .matches = has_no_fpsimd, + }, {}, }; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 394c61db5566..b883f1f75216 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -127,6 +127,8 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { + if (!system_supports_fpsimd()) + return; /* * Save the current FPSIMD state to memory, but only if whatever is in * the registers is in fact the most recent userland FPSIMD state of @@ -157,6 +159,8 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + if (!system_supports_fpsimd()) + return; memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); @@ -168,6 +172,8 @@ void fpsimd_flush_thread(void) */ void fpsimd_preserve_current_state(void) { + if (!system_supports_fpsimd()) + return; preempt_disable(); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); @@ -181,6 +187,8 @@ void fpsimd_preserve_current_state(void) */ void fpsimd_restore_current_state(void) { + if (!system_supports_fpsimd()) + return; preempt_disable(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -199,6 +207,8 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct fpsimd_state *state) { + if (!system_supports_fpsimd()) + return; preempt_disable(); fpsimd_load_state(state); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -228,6 +238,8 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); */ void kernel_neon_begin_partial(u32 num_regs) { + if (WARN_ON(!system_supports_fpsimd())) + return; if (in_interrupt()) { struct fpsimd_partial_state *s = this_cpu_ptr( in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); @@ -252,6 +264,8 @@ EXPORT_SYMBOL(kernel_neon_begin_partial); void kernel_neon_end(void) { + if (!system_supports_fpsimd()) + return; if (in_interrupt()) { struct fpsimd_partial_state *s = this_cpu_ptr( in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a204adf29f0a..1bfe30dfbfe7 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -57,6 +57,16 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +/* + * Guest access to FP/ASIMD registers are routed to this handler only + * when the system doesn't support FP/ASIMD. + */ +static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + kvm_inject_undefined(vcpu); + return 1; +} + /** * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event * instruction executed by a guest @@ -144,6 +154,7 @@ static exit_handle_fn arm_exit_handlers[] = { [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug, [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug, [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug, + [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd, }; static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 4e92399f7105..5e9052f087f2 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -106,9 +106,16 @@ el1_trap: * x0: ESR_EC */ - /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + */ +alternative_if_not ARM64_HAS_NO_FPSIMD cmp x0, #ESR_ELx_EC_FP_ASIMD b.eq __fpsimd_guest_restore +alternative_else_nop_endif mrs x1, tpidr_el2 mov x0, #ARM_EXCEPTION_TRAP diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 83037cd62d01..8bcae7b14704 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -21,6 +21,7 @@ #include #include #include +#include static bool __hyp_text __fpsimd_enabled_nvhe(void) { @@ -76,9 +77,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) * traps are only taken to EL2 if the operation would not otherwise * trap to EL1. Therefore, always make sure that for 32-bit guests, * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. */ val = vcpu->arch.hcr_el2; - if (!(val & HCR_RW)) { + if (!(val & HCR_RW) && system_supports_fpsimd()) { write_sysreg(1 << 30, fpexc32_el2); isb(); } -- cgit v1.2.3 From b08fb180bb8802d1c599beb1acd6a3b26163b4f6 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 14 Nov 2016 19:32:43 +0530 Subject: arm64: Allow hw watchpoint at varied offset from base address ARM64 hardware supports watchpoint at any double word aligned address. However, it can select any consecutive bytes from offset 0 to 7 from that base address. For example, if base address is programmed as 0x420030 and byte select is 0x1C, then access of 0x420032,0x420033 and 0x420034 will generate a watchpoint exception. Currently, we do not have such modularity. We can only program byte, halfword, word and double word access exception from any base address. This patch adds support to overcome above limitations. Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/include/asm/hw_breakpoint.h | 2 +- arch/arm64/kernel/hw_breakpoint.c | 47 +++++++++++++++++----------------- arch/arm64/kernel/ptrace.c | 7 ++--- 3 files changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 9510ace570e2..d1c3b06ad307 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -119,7 +119,7 @@ struct perf_event; struct pmu; extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type); + int *gen_len, int *gen_type, int *offset); extern int arch_check_bp_in_kernelspace(struct perf_event *bp); extern int arch_validate_hwbkpt_settings(struct perf_event *bp); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 948b73148d56..3f7bc65e7ef6 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -349,7 +349,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) * to generic breakpoint descriptions. */ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type) + int *gen_len, int *gen_type, int *offset) { /* Type */ switch (ctrl.type) { @@ -369,8 +369,12 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, return -EINVAL; } + if (!ctrl.len) + return -EINVAL; + *offset = __ffs(ctrl.len); + /* Len */ - switch (ctrl.len) { + switch (ctrl.len >> *offset) { case ARM_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -517,18 +521,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) default: return -EINVAL; } - - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; } else { if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) alignment_mask = 0x3; else alignment_mask = 0x7; - if (info->address & alignment_mask) - return -EINVAL; + offset = info->address & alignment_mask; } + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + /* * Disallow per-task kernel breakpoints since these would * complicate the stepping code. @@ -665,8 +668,8 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { int i, step = 0, *kernel_step, access; - u32 ctrl_reg; - u64 val, alignment_mask; + u32 ctrl_reg, lens, lene; + u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; struct arch_hw_breakpoint *info; @@ -684,25 +687,21 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, goto unlock; info = counter_arch_bp(wp); - /* AArch32 watchpoints are either 4 or 8 bytes aligned. */ - if (is_compat_task()) { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - } else { - alignment_mask = 0x7; - } - /* Check if the watchpoint value matches. */ + /* Check if the watchpoint value and byte select match. */ val = read_wb_reg(AARCH64_DBG_REG_WVR, i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) + lens = ffs(ctrl.len) - 1; + lene = fls(ctrl.len) - 1; + /* + * FIXME: reported address can be anywhere between "the + * lowest address accessed by the memory access that + * triggered the watchpoint" and "the highest watchpointed + * address accessed by the memory access". So, it may not + * lie in the interval of watchpoint address range. + */ + if (addr < val + lens || addr > val + lene) goto unlock; /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e0c81da60f76..fc35e06ccaac 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -327,13 +327,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, struct arch_hw_breakpoint_ctrl ctrl, struct perf_event_attr *attr) { - int err, len, type, disabled = !ctrl.enabled; + int err, len, type, offset, disabled = !ctrl.enabled; attr->disabled = disabled; if (disabled) return 0; - err = arch_bp_generic_fields(ctrl, &len, &type); + err = arch_bp_generic_fields(ctrl, &len, &type, &offset); if (err) return err; @@ -352,6 +352,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, attr->bp_len = len; attr->bp_type = type; + attr->bp_addr += offset; return 0; } @@ -404,7 +405,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type, if (IS_ERR(bp)) return PTR_ERR(bp); - *addr = bp ? bp->attr.bp_addr : 0; + *addr = bp ? counter_arch_bp(bp)->address : 0; return 0; } -- cgit v1.2.3 From fdfeff0f9e3d9be2b68fa02566017ffc581ae17b Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 14 Nov 2016 19:32:44 +0530 Subject: arm64: hw_breakpoint: Handle inexact watchpoint addresses Arm64 hardware does not always report a watchpoint hit address that matches one of the watchpoints set. It can also report an address "near" the watchpoint if a single instruction access both watched and unwatched addresses. There is no straight-forward way, short of disassembling the offending instruction, to map that address back to the watchpoint. Previously, when the hardware reported a watchpoint hit on an address that did not match our watchpoint (this happens in case of instructions which access large chunks of memory such as "stp") the process would enter a loop where we would be continually resuming it (because we did not recognise that watchpoint hit) and it would keep hitting the watchpoint again and again. The tracing process would never get notified of the watchpoint hit. This commit fixes the problem by looking at the watchpoints near the address reported by the hardware. If the address does not exactly match one of the watchpoints we have set, it attributes the hit to the nearest watchpoint we have. This heuristic is a bit dodgy, but I don't think we can do much more, given the hardware limitations. Signed-off-by: Pavel Labath [panand: reworked to rebase on his patches] Signed-off-by: Pratyush Anand [will: use __ffs instead of ffs - 1] Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 96 ++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 27 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 3f7bc65e7ef6..13035d06b498 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -664,11 +664,46 @@ unlock: } NOKPROBE_SYMBOL(breakpoint_handler); +/* + * Arm64 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See Section D2.10.5 "Determining the memory location that caused a Watchpoint + * exception" of ARMv8 Architecture Reference Manual for details. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u64 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - int i, step = 0, *kernel_step, access; - u32 ctrl_reg, lens, lene; + int i, step = 0, *kernel_step, access, closest_match = 0; + u64 min_dist = -1, dist; + u32 ctrl_reg; u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; @@ -678,31 +713,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, slots = this_cpu_ptr(wp_on_reg); debug_info = ¤t->thread.debug; + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; - - info = counter_arch_bp(wp); - - /* Check if the watchpoint value and byte select match. */ - val = read_wb_reg(AARCH64_DBG_REG_WVR, i); - ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); - decode_ctrl_reg(ctrl_reg, &ctrl); - lens = ffs(ctrl.len) - 1; - lene = fls(ctrl.len) - 1; - /* - * FIXME: reported address can be anywhere between "the - * lowest address accessed by the memory access that - * triggered the watchpoint" and "the highest watchpointed - * address accessed by the memory access". So, it may not - * lie in the interval of watchpoint address range. - */ - if (addr < val + lens || addr > val + lene) - goto unlock; + continue; /* * Check that the access type matches. @@ -711,18 +730,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; + /* Check if the watchpoint value and byte select match. */ + val = read_wb_reg(AARCH64_DBG_REG_WVR, i); + ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + + info = counter_arch_bp(wp); info->trigger = addr; perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ if (is_default_overflow_handler(wp)) step = 1; + } + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + perf_bp_event(wp, regs); -unlock: - rcu_read_unlock(); + /* Do we need to handle the stepping? */ + if (is_default_overflow_handler(wp)) + step = 1; } + rcu_read_unlock(); if (!step) return 0; -- cgit v1.2.3 From 0ddb8e0b784ba034f3096d5a54684d0d73155e2a Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 14 Nov 2016 19:32:45 +0530 Subject: arm64: Allow hw watchpoint of length 3,5,6 and 7 Since, arm64 can support all offset within a double word limit. Therefore, now support other lengths within that range as well. Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/include/asm/hw_breakpoint.h | 4 ++++ arch/arm64/kernel/hw_breakpoint.c | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index d1c3b06ad307..b6b167ac082b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -77,7 +77,11 @@ static inline void decode_ctrl_reg(u32 reg, /* Lengths */ #define ARM_BREAKPOINT_LEN_1 0x1 #define ARM_BREAKPOINT_LEN_2 0x3 +#define ARM_BREAKPOINT_LEN_3 0x7 #define ARM_BREAKPOINT_LEN_4 0xf +#define ARM_BREAKPOINT_LEN_5 0x1f +#define ARM_BREAKPOINT_LEN_6 0x3f +#define ARM_BREAKPOINT_LEN_7 0x7f #define ARM_BREAKPOINT_LEN_8 0xff /* Kernel stepping */ diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 13035d06b498..1b3c747fedda 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -317,9 +317,21 @@ static int get_hbp_len(u8 hbp_len) case ARM_BREAKPOINT_LEN_2: len_in_bytes = 2; break; + case ARM_BREAKPOINT_LEN_3: + len_in_bytes = 3; + break; case ARM_BREAKPOINT_LEN_4: len_in_bytes = 4; break; + case ARM_BREAKPOINT_LEN_5: + len_in_bytes = 5; + break; + case ARM_BREAKPOINT_LEN_6: + len_in_bytes = 6; + break; + case ARM_BREAKPOINT_LEN_7: + len_in_bytes = 7; + break; case ARM_BREAKPOINT_LEN_8: len_in_bytes = 8; break; @@ -381,9 +393,21 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, case ARM_BREAKPOINT_LEN_2: *gen_len = HW_BREAKPOINT_LEN_2; break; + case ARM_BREAKPOINT_LEN_3: + *gen_len = HW_BREAKPOINT_LEN_3; + break; case ARM_BREAKPOINT_LEN_4: *gen_len = HW_BREAKPOINT_LEN_4; break; + case ARM_BREAKPOINT_LEN_5: + *gen_len = HW_BREAKPOINT_LEN_5; + break; + case ARM_BREAKPOINT_LEN_6: + *gen_len = HW_BREAKPOINT_LEN_6; + break; + case ARM_BREAKPOINT_LEN_7: + *gen_len = HW_BREAKPOINT_LEN_7; + break; case ARM_BREAKPOINT_LEN_8: *gen_len = HW_BREAKPOINT_LEN_8; break; @@ -427,9 +451,21 @@ static int arch_build_bp_info(struct perf_event *bp) case HW_BREAKPOINT_LEN_2: info->ctrl.len = ARM_BREAKPOINT_LEN_2; break; + case HW_BREAKPOINT_LEN_3: + info->ctrl.len = ARM_BREAKPOINT_LEN_3; + break; case HW_BREAKPOINT_LEN_4: info->ctrl.len = ARM_BREAKPOINT_LEN_4; break; + case HW_BREAKPOINT_LEN_5: + info->ctrl.len = ARM_BREAKPOINT_LEN_5; + break; + case HW_BREAKPOINT_LEN_6: + info->ctrl.len = ARM_BREAKPOINT_LEN_6; + break; + case HW_BREAKPOINT_LEN_7: + info->ctrl.len = ARM_BREAKPOINT_LEN_7; + break; case HW_BREAKPOINT_LEN_8: info->ctrl.len = ARM_BREAKPOINT_LEN_8; break; -- cgit v1.2.3 From a8ada146f5179793bbaff3c936d73147ccd47ba2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 28 Oct 2016 18:07:37 +0100 Subject: arm64: Update the synchronous external abort fault description This patch updates the description of the synchronous external aborts on translation table walks. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 0f8788374815..d035cc594445 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -507,10 +507,10 @@ static const struct fault_info { { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, { do_bad, SIGBUS, 0, "synchronous parity error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, -- cgit v1.2.3 From bd38967d406fb4f9fca67d612db71b5d74cfb0f5 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Jul 2016 14:58:21 +0100 Subject: arm64: Factor out PAN enabling/disabling into separate uaccess_* macros This patch moves the directly coded alternatives for turning PAN on/off into separate uaccess_{enable,disable} macros or functions. The asm macros take a few arguments which will be used in subsequent patches. Note that any (unlikely) access that the compiler might generate between uaccess_enable() and uaccess_disable(), other than those explicitly specified by the user access code, will not be protected by PAN. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/futex.h | 17 ++++---- arch/arm64/include/asm/uaccess.h | 79 +++++++++++++++++++++++++++++++----- arch/arm64/kernel/armv8_deprecated.c | 11 +++-- arch/arm64/lib/clear_user.S | 11 ++--- arch/arm64/lib/copy_from_user.S | 11 ++--- arch/arm64/lib/copy_in_user.S | 11 ++--- arch/arm64/lib/copy_to_user.S | 11 ++--- 7 files changed, 93 insertions(+), 58 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index f2585cdd32c2..85c4a8981d47 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -21,15 +21,12 @@ #include #include -#include -#include #include -#include #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ +do { \ + uaccess_enable(); \ asm volatile( \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ @@ -44,11 +41,11 @@ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -118,8 +115,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + uaccess_enable(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" -ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -134,10 +131,10 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " .popsection\n" _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) -ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); + uaccess_disable(); *uval = val; return ret; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 55d0adbf6509..154659509afb 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,11 @@ #ifndef __ASM_UACCESS_H #define __ASM_UACCESS_H +#include +#include + +#ifndef __ASSEMBLY__ + /* * User space memory access functions */ @@ -26,10 +31,8 @@ #include #include -#include #include #include -#include #include #include #include @@ -119,6 +122,44 @@ static inline void set_fs(mm_segment_t fs) " .long (" #from " - .), (" #to " - .)\n" \ " .popsection\n" +/* + * User access enabling/disabling. + */ +#define __uaccess_disable(alt) \ +do { \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +#define __uaccess_enable(alt) \ +do { \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ + CONFIG_ARM64_PAN)); \ +} while (0) + +static inline void uaccess_disable(void) +{ + __uaccess_disable(ARM64_HAS_PAN); +} + +static inline void uaccess_enable(void) +{ + __uaccess_enable(ARM64_HAS_PAN); +} + +/* + * These functions are no-ops when UAO is present. + */ +static inline void uaccess_disable_not_uao(void) +{ + __uaccess_disable(ARM64_ALT_PAN_NOT_UAO); +} + +static inline void uaccess_enable_not_uao(void) +{ + __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); +} + /* * The "__xxx" versions of the user access functions do not verify the address * space - it must have been done previously with a separate "access_ok()" @@ -146,8 +187,7 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ @@ -168,9 +208,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -215,8 +254,7 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ @@ -237,8 +275,7 @@ do { \ default: \ BUILD_BUG(); \ } \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\ - CONFIG_ARM64_PAN)); \ + uaccess_disable_not_uao(); \ } while (0) #define __put_user(x, ptr) \ @@ -331,4 +368,26 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +#else /* __ASSEMBLY__ */ + +#include + +/* + * User access enabling/disabling macros. These are no-ops when UAO is + * present. + */ + .macro uaccess_disable_not_uao, tmp1 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(1) +alternative_else_nop_endif + .endm + + .macro uaccess_enable_not_uao, tmp1, tmp2 +alternative_if ARM64_ALT_PAN_NOT_UAO + SET_PSTATE_PAN(0) +alternative_else_nop_endif + .endm + +#endif /* __ASSEMBLY__ */ + #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index b0988bb1bf64..bdb35b92003e 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -285,10 +284,10 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) #define __SWP_LL_SC_LOOPS 4 #define __user_swpX_asm(data, addr, res, temp, temp2, B) \ +do { \ + uaccess_enable(); \ __asm__ __volatile__( \ " mov %w3, %w7\n" \ - ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ "0: ldxr"B" %w2, [%4]\n" \ "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ @@ -306,12 +305,12 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) " .popsection" \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ - ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ - CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ - : "memory") + : "memory"); \ + uaccess_disable(); \ +} while (0) #define __user_swp_asm(data, addr, res, temp, temp2) \ __user_swpX_asm(data, addr, res, temp, temp2, "") diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 5d1cad3ce6d6..d7150e30438a 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -17,10 +17,7 @@ */ #include -#include -#include -#include -#include +#include .text @@ -33,8 +30,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x2, x3 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -54,8 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x2 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4fd67ea03bb0..cfe13396085b 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -16,11 +16,8 @@ #include -#include -#include #include -#include -#include +#include /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -67,12 +64,10 @@ end .req x5 ENTRY(__arch_copy_from_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index f7292dd08c84..718b1c4e2f85 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -18,11 +18,8 @@ #include -#include -#include #include -#include -#include +#include /* * Copy from user space to user space (alignment handled by the hardware) @@ -68,12 +65,10 @@ end .req x5 ENTRY(__copy_in_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 7a7efe255034..e99e31c9acac 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -16,11 +16,8 @@ #include -#include -#include #include -#include -#include +#include /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -66,12 +63,10 @@ end .req x5 ENTRY(__arch_copy_to_user) -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_enable_not_uao x3, x4 add end, x0, x2 #include "copy_template.S" -ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ - CONFIG_ARM64_PAN) + uaccess_disable_not_uao x3 mov x0, #0 ret ENDPROC(__arch_copy_to_user) -- cgit v1.2.3 From f33bcf03e6079668da6bf4eec4a7dcf9289131d0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Jul 2016 15:48:55 +0100 Subject: arm64: Factor out TTBR0_EL1 post-update workaround into a specific asm macro This patch takes the errata workaround code out of cpu_do_switch_mm into a dedicated post_ttbr0_update_workaround macro which will be reused in a subsequent patch. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 13 +++++++++++++ arch/arm64/mm/proc.S | 6 +----- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 128a9ca06c8a..55752231055c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -406,4 +406,17 @@ alternative_endif movk \reg, :abs_g0_nc:\val .endm +/* + * Errata workaround post TTBR0_EL1 update. + */ + .macro post_ttbr0_update_workaround +#ifdef CONFIG_CAVIUM_ERRATUM_27456 +alternative_if ARM64_WORKAROUND_CAVIUM_27456 + ic iallu + dsb nsh + isb +alternative_else_nop_endif +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 6a853a867d9e..32682be978e0 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -142,11 +142,7 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -alternative_if ARM64_WORKAROUND_CAVIUM_27456 - ic iallu - dsb nsh - isb -alternative_else_nop_endif + post_ttbr0_update_workaround ret ENDPROC(cpu_do_switch_mm) -- cgit v1.2.3 From 4b65a5db362783ab4b04ca1c1d2ad70ed9b0ba2a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Jul 2016 16:53:00 +0100 Subject: arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1 This patch adds the uaccess macros/functions to disable access to user space by setting TTBR0_EL1 to a reserved zeroed page. Since the value written to TTBR0_EL1 must be a physical address, for simplicity this patch introduces a reserved_ttbr0 page at a constant offset from swapper_pg_dir. The uaccess_disable code uses the ttbr1_el1 value adjusted by the reserved_ttbr0 offset. Enabling access to user is done by restoring TTBR0_EL1 with the value from the struct thread_info ttbr0 variable. Interrupts must be disabled during the uaccess_ttbr0_enable code to ensure the atomicity of the thread_info.ttbr0 read and TTBR0_EL1 write. This patch also moves the get_thread_info asm macro from entry.S to assembler.h for reuse in the uaccess_ttbr0_* macros. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 16 +++++ arch/arm64/include/asm/cpufeature.h | 6 ++ arch/arm64/include/asm/kernel-pgtable.h | 7 +++ arch/arm64/include/asm/thread_info.h | 3 + arch/arm64/include/asm/uaccess.h | 108 ++++++++++++++++++++++++++++++-- arch/arm64/kernel/asm-offsets.c | 3 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/entry.S | 4 -- arch/arm64/kernel/head.S | 6 +- arch/arm64/kernel/vmlinux.lds.S | 5 ++ 10 files changed, 146 insertions(+), 13 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 55752231055c..446f6c46d4b1 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -41,6 +41,15 @@ msr daifclr, #2 .endm + .macro save_and_disable_irq, flags + mrs \flags, daif + msr daifset, #2 + .endm + + .macro restore_irq, flags + msr daif, \flags + .endm + /* * Enable and disable debug exceptions. */ @@ -406,6 +415,13 @@ alternative_endif movk \reg, :abs_g0_nc:\val .endm +/* + * Return the current thread_info. + */ + .macro get_thread_info, rd + mrs \rd, sp_el0 + .endm + /* * Errata workaround post TTBR0_EL1 update. */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0ef718b67c54..a081531f9ff4 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -241,6 +241,12 @@ static inline bool system_supports_fpsimd(void) return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } +static inline bool system_uses_ttbr0_pan(void) +{ + return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && + !cpus_have_cap(ARM64_HAS_PAN); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 7e51d1b57c0c..7803343e5881 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -19,6 +19,7 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H +#include #include /* @@ -54,6 +55,12 @@ #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +#define RESERVED_TTBR0_SIZE (PAGE_SIZE) +#else +#define RESERVED_TTBR0_SIZE (0) +#endif + /* Initial memory map size */ #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index c17ad4d213d0..46c3b93cf865 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -47,6 +47,9 @@ typedef unsigned long mm_segment_t; struct thread_info { unsigned long flags; /* low level flags */ mm_segment_t addr_limit; /* address limit */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ +#endif int preempt_count; /* 0 => preemptable, <0 => bug */ }; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 154659509afb..6986f56cfa88 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -19,6 +19,7 @@ #define __ASM_UACCESS_H #include +#include #include #ifndef __ASSEMBLY__ @@ -125,16 +126,71 @@ static inline void set_fs(mm_segment_t fs) /* * User access enabling/disabling. */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void __uaccess_ttbr0_disable(void) +{ + unsigned long ttbr; + + /* reserved_ttbr0 placed at the end of swapper_pg_dir */ + ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; + write_sysreg(ttbr, ttbr0_el1); + isb(); +} + +static inline void __uaccess_ttbr0_enable(void) +{ + unsigned long flags; + + /* + * Disable interrupts to avoid preemption between reading the 'ttbr0' + * variable and the MSR. A context switch could trigger an ASID + * roll-over and an update of 'ttbr0'. + */ + local_irq_save(flags); + write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + isb(); + local_irq_restore(flags); +} + +static inline bool uaccess_ttbr0_disable(void) +{ + if (!system_uses_ttbr0_pan()) + return false; + __uaccess_ttbr0_disable(); + return true; +} + +static inline bool uaccess_ttbr0_enable(void) +{ + if (!system_uses_ttbr0_pan()) + return false; + __uaccess_ttbr0_enable(); + return true; +} +#else +static inline bool uaccess_ttbr0_disable(void) +{ + return false; +} + +static inline bool uaccess_ttbr0_enable(void) +{ + return false; +} +#endif + #define __uaccess_disable(alt) \ do { \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ - CONFIG_ARM64_PAN)); \ + if (!uaccess_ttbr0_disable()) \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __uaccess_enable(alt) \ do { \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ - CONFIG_ARM64_PAN)); \ + if (uaccess_ttbr0_enable()) \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ + CONFIG_ARM64_PAN)); \ } while (0) static inline void uaccess_disable(void) @@ -373,16 +429,56 @@ extern __must_check long strnlen_user(const char __user *str, long n); #include /* - * User access enabling/disabling macros. These are no-ops when UAO is - * present. + * User access enabling/disabling macros. + */ +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + .macro __uaccess_ttbr0_disable, tmp1 + mrs \tmp1, ttbr1_el1 // swapper_pg_dir + add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir + msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 + isb + .endm + + .macro __uaccess_ttbr0_enable, tmp1 + get_thread_info \tmp1 + ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 + isb + .endm + + .macro uaccess_ttbr0_disable, tmp1 +alternative_if_not ARM64_HAS_PAN + __uaccess_ttbr0_disable \tmp1 +alternative_else_nop_endif + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 +alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption + __uaccess_ttbr0_enable \tmp1 + restore_irq \tmp2 +alternative_else_nop_endif + .endm +#else + .macro uaccess_ttbr0_disable, tmp1 + .endm + + .macro uaccess_ttbr0_enable, tmp1, tmp2 + .endm +#endif + +/* + * These macros are no-ops when UAO is present. */ .macro uaccess_disable_not_uao, tmp1 + uaccess_ttbr0_disable \tmp1 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(1) alternative_else_nop_endif .endm .macro uaccess_enable_not_uao, tmp1, tmp2 + uaccess_ttbr0_enable \tmp1, \tmp2 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c2dc9fa4f09b..bc049afc73a7 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -39,6 +39,9 @@ int main(void) DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); +#endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); BLANK(); DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f89385d794f6..fdf8f045929f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -47,6 +47,7 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +EXPORT_SYMBOL(cpu_hwcaps); DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6349a8324b4f..b7db3766a312 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -183,10 +183,6 @@ alternative_else_nop_endif eret // return to kernel .endm - .macro get_thread_info, rd - mrs \rd, sp_el0 - .endm - .macro irq_stack_entry mov x19, sp // preserve the original sp diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index eaafb253bbfa..7ee6d74101cf 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -326,14 +326,14 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE bl __inval_cache_range /* * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -412,7 +412,7 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE dmb sy bl __inval_cache_range diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1105aab1e6d6..b8deffa9e1bf 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -216,6 +216,11 @@ SECTIONS swapper_pg_dir = .; . += SWAPPER_DIR_SIZE; +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + reserved_ttbr0 = .; + . += RESERVED_TTBR0_SIZE; +#endif + _end = .; STABS_DEBUG -- cgit v1.2.3 From 39bc88e5e38e9b213bd7d833ce0df6ec029761ad Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 2 Sep 2016 14:54:03 +0100 Subject: arm64: Disable TTBR0_EL1 during normal kernel execution When the TTBR0 PAN feature is enabled, the kernel entry points need to disable access to TTBR0_EL1. The PAN status of the interrupted context is stored as part of the saved pstate, reusing the PSR_PAN_BIT (22). Restoring access to TTBR0_EL1 is done on exception return if returning to user or returning to a context where PAN was disabled. Context switching via switch_mm() must defer the update of TTBR0_EL1 until a return to user or an explicit uaccess_enable() call. Special care needs to be taken for two cases where TTBR0_EL1 is set outside the normal kernel context switch operation: EFI run-time services (via efi_set_pgd) and CPU suspend (via cpu_(un)install_idmap). Code has been added to avoid deferred TTBR0_EL1 switching as in switch_mm() and restore the reserved TTBR0_EL1 when uninstalling the special TTBR0_EL1. User cache maintenance (user_cache_maint_handler and __flush_cache_user_range) needs the TTBR0_EL1 re-instated since the operations are performed by user virtual address. This patch also removes a stale comment on the switch_mm() function. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 26 ++++++++++++++- arch/arm64/include/asm/mmu_context.h | 53 ++++++++++++++++++++++-------- arch/arm64/kernel/entry.S | 63 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 9 ++++++ arch/arm64/kernel/traps.c | 9 ++++-- arch/arm64/mm/cache.S | 6 +++- arch/arm64/mm/context.c | 7 +++- 7 files changed, 153 insertions(+), 20 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a9e54aad15ef..3a405dccb6cf 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -1,6 +1,7 @@ #ifndef _ASM_EFI_H #define _ASM_EFI_H +#include #include #include #include @@ -75,7 +76,30 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt) static inline void efi_set_pgd(struct mm_struct *mm) { - switch_mm(NULL, mm, NULL); + __switch_mm(mm); + + if (system_uses_ttbr0_pan()) { + if (mm != current->active_mm) { + /* + * Update the current thread's saved ttbr0 since it is + * restored as part of a return from exception. Set + * the hardware TTBR0_EL1 using cpu_switch_mm() + * directly to enable potential errata workarounds. + */ + update_saved_ttbr0(current, mm); + cpu_switch_mm(mm->pgd, mm); + } else { + /* + * Defer the switch to the current thread's TTBR0_EL1 + * until uaccess_enable(). Restore the current + * thread's saved ttbr0 corresponding to its active_mm + * (if different from init_mm). + */ + cpu_set_reserved_ttbr0(); + if (current->active_mm != &init_mm) + update_saved_ttbr0(current, current->active_mm); + } + } } void efi_virtmap_load(void); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a50185375f09..0363fe80455c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -103,7 +104,7 @@ static inline void cpu_uninstall_idmap(void) local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); - if (mm != &init_mm) + if (mm != &init_mm && !system_uses_ttbr0_pan()) cpu_switch_mm(mm->pgd, mm); } @@ -163,20 +164,26 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -/* - * This is the actual mm switch as far as the scheduler - * is concerned. No registers are touched. We avoid - * calling the CPU specific function when the mm hasn't - * actually changed. - */ -static inline void -switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) +#ifdef CONFIG_ARM64_SW_TTBR0_PAN +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) { - unsigned int cpu = smp_processor_id(); + if (system_uses_ttbr0_pan()) { + BUG_ON(mm->pgd == swapper_pg_dir); + task_thread_info(tsk)->ttbr0 = + virt_to_phys(mm->pgd) | ASID(mm) << 48; + } +} +#else +static inline void update_saved_ttbr0(struct task_struct *tsk, + struct mm_struct *mm) +{ +} +#endif - if (prev == next) - return; +static inline void __switch_mm(struct mm_struct *next) +{ + unsigned int cpu = smp_processor_id(); /* * init_mm.pgd does not contain any user mappings and it is always @@ -190,8 +197,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, check_and_switch_context(next, cpu); } +static inline void +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + if (prev != next) + __switch_mm(next); + + /* + * Update the saved TTBR0_EL1 of the scheduled-in task as the previous + * value may have not been initialised yet (activate_mm caller) or the + * ASID has changed since the last run (following the context switch + * of another thread of the same process). Avoid setting the reserved + * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit). + */ + if (next != &init_mm) + update_saved_ttbr0(tsk, next); +} + #define deactivate_mm(tsk,mm) do { } while (0) -#define activate_mm(prev,next) switch_mm(prev, next, NULL) +#define activate_mm(prev,next) switch_mm(prev, next, current) void verify_cpu_asid_bits(void); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b7db3766a312..4f0d76339414 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,7 +29,9 @@ #include #include #include +#include #include +#include #include /* @@ -108,6 +110,32 @@ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Set the TTBR0 PAN bit in SPSR. When the exception is taken from + * EL0, there is no need to check the state of TTBR0_EL1 since + * accesses are always enabled. + * Note that the meaning of this bit differs from the ARMv8.1 PAN + * feature as all TTBR0_EL1 accesses are disabled, not just those to + * user mappings. + */ +alternative_if ARM64_HAS_PAN + b 1f // skip TTBR0 PAN +alternative_else_nop_endif + + .if \el != 0 + mrs x21, ttbr0_el1 + tst x21, #0xffff << 48 // Check for the reserved ASID + orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR + b.eq 1f // TTBR0 access already disabled + and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR + .endif + + __uaccess_ttbr0_disable x21 +1: +#endif + stp x22, x23, [sp, #S_PC] /* @@ -146,6 +174,40 @@ ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter + .endif + +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR + * PAN bit checking. + */ +alternative_if ARM64_HAS_PAN + b 2f // skip TTBR0 PAN +alternative_else_nop_endif + + .if \el != 0 + tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set + .endif + + __uaccess_ttbr0_enable x0 + + .if \el == 0 + /* + * Enable errata workarounds only if returning to user. The only + * workaround currently required for TTBR0_EL1 changes are for the + * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache + * corruption). + */ + post_ttbr0_update_workaround + .endif +1: + .if \el != 0 + and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit + .endif +2: +#endif + + .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -161,6 +223,7 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif .endif + msr elr_el1, x21 // set up the return data msr spsr_el1, x22 ldp x0, x1, [sp, #16 * 0] diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f534f492a268..a53f52ac81c6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -291,6 +291,15 @@ void __init setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + /* + * Make sure init_thread_info.ttbr0 always generates translation + * faults in case uaccess_enable() is inadvertently called by the init + * thread. + */ + init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page); +#endif + #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) conswitchp = &vga_con; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4731133286b3..5b830be79c01 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -440,9 +440,10 @@ int cpu_enable_cache_maint_trap(void *__unused) } #define __user_cache_maint(insn, address, res) \ - if (untagged_addr(address) >= user_addr_max()) \ + if (untagged_addr(address) >= user_addr_max()) { \ res = -EFAULT; \ - else \ + } else { \ + uaccess_ttbr0_enable(); \ asm volatile ( \ "1: " insn ", %1\n" \ " mov %w0, #0\n" \ @@ -454,7 +455,9 @@ int cpu_enable_cache_maint_trap(void *__unused) " .popsection\n" \ _ASM_EXTABLE(1b, 3b) \ : "=r" (res) \ - : "r" (address), "i" (-EFAULT) ) + : "r" (address), "i" (-EFAULT)); \ + uaccess_ttbr0_disable(); \ + } static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 58b5a906ff78..da9576932322 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -23,6 +23,7 @@ #include #include #include +#include /* * flush_icache_range(start,end) @@ -48,6 +49,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) + uaccess_ttbr0_enable x2, x3 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 @@ -69,10 +71,12 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU dsb ish isb mov x0, #0 +1: + uaccess_ttbr0_disable x1 ret 9: mov x0, #-EFAULT - ret + b 1b ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index efcf1f7ef1e4..4c63cb154859 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -221,7 +221,12 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: - cpu_switch_mm(mm->pgd, mm); + /* + * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when + * emulating PAN. + */ + if (!system_uses_ttbr0_pan()) + cpu_switch_mm(mm->pgd, mm); } static int asids_init(void) -- cgit v1.2.3 From 786889636ad75296c213547d1ca656af4c59f390 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Jul 2016 18:22:39 +0100 Subject: arm64: Handle faults caused by inadvertent user access with PAN enabled When TTBR0_EL1 is set to the reserved page, an erroneous kernel access to user space would generate a translation fault. This patch adds the checks for the software-set PSR_PAN_BIT to emulate a permission fault and report it accordingly. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d035cc594445..a78a5c401806 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -269,13 +269,19 @@ out: return fault; } -static inline bool is_permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; - return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) || - (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM); + if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR) + return false; + + if (system_uses_ttbr0_pan()) + return fsc_type == ESR_ELx_FSC_FAULT && + (regs->pstate & PSR_PAN_BIT); + else + return fsc_type == ESR_ELx_FSC_PERM; } static bool is_el0_instruction_abort(unsigned int esr) @@ -315,7 +321,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (is_permission_fault(esr) && (addr < USER_DS)) { + if (addr < USER_DS && is_permission_fault(esr, regs)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); -- cgit v1.2.3 From 9cf09d68b89ae5fe0261dcc69464bcc676900af6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 5 Jul 2016 12:25:15 +0100 Subject: arm64: xen: Enable user access before a privcmd hvc call Privcmd calls are issued by the userspace. The kernel needs to enable access to TTBR0_EL1 as the hypervisor would issue stage 1 translations to user memory via AT instructions. Since AT instructions are not affected by the PAN bit (ARMv8.1), we only need the explicit uaccess_enable/disable if the TTBR0 PAN option is enabled. Reviewed-by: Julien Grall Acked-by: Stefano Stabellini Cc: Will Deacon Cc: James Morse Cc: Kees Cook Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/xen/hypercall.S | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 329c8027b0a9..b41aff25426d 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -49,6 +49,7 @@ #include #include +#include #include @@ -91,6 +92,20 @@ ENTRY(privcmd_call) mov x2, x3 mov x3, x4 mov x4, x5 + /* + * Privcmd calls are issued by the userspace. The kernel needs to + * enable access to TTBR0_EL1 as the hypervisor would issue stage 1 + * translations to user memory via AT instructions. Since AT + * instructions are not affected by the PAN bit (ARMv8.1), we only + * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation + * is enabled (it implies that hardware UAO and PAN disabled). + */ + uaccess_ttbr0_enable x6, x7 hvc XEN_IMM + + /* + * Disable userspace access from kernel once the hyp call completed. + */ + uaccess_ttbr0_disable x6 ret ENDPROC(privcmd_call); -- cgit v1.2.3 From ba42822af1c287f038aa550f3578c61c212a892e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 1 Jul 2016 18:25:31 +0100 Subject: arm64: Enable CONFIG_ARM64_SW_TTBR0_PAN This patch adds the Kconfig option to enable support for TTBR0 PAN emulation. The option is default off because of a slight performance hit when enabled, caused by the additional TTBR0_EL1 switching during user access operations or exception entry/exit code. Cc: Will Deacon Cc: James Morse Cc: Kees Cook Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0b8227f23eed..e8dd55e7db38 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -794,6 +794,14 @@ config SETEND_EMULATION If unsure, say Y endif +config ARM64_SW_TTBR0_PAN + bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved + zeroed area and reserved ASID. The user access routines + restore the valid TTBR0_EL1 temporarily. + menu "ARMv8.1 architectural features" config ARM64_HW_AFDBM -- cgit v1.2.3 From 833a9f4b5c6103f7e9f24f626bee6265e333cff8 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 17 Aug 2016 17:24:04 +0100 Subject: arm64: Enable HIBERNATION in defconfig This patch adds CONFIG_HIBERNATION to the arm64 defconfig. Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64') diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index dab2cb0c1f1c..4fc5ae07b035 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -82,6 +82,7 @@ CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y +CONFIG_HIBERNATION=y CONFIG_ARM_CPUIDLE=y CONFIG_CPU_FREQ=y CONFIG_CPUFREQ_DT=y -- cgit v1.2.3 From ee6a7fce8e5ecd90794ad7c9f62518c753fb3cb6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 23 Nov 2016 18:05:52 +0000 Subject: arm64: Remove I-cache invalidation from flush_cache_range() The flush_cache_range() function (similarly for flush_cache_page()) is called when the kernel is changing an existing VA->PA mapping range to either a new PA or to different attributes. Since ARMv8 has PIPT-like D-caches, this function does not need to perform any D-cache maintenance. The I-cache maintenance is already handled via set_pte_at() and flush_cache_range() cannot anyway guarantee that there are no cache lines left after invalidation due to the speculative loads. This patch makes flush_cache_range() a no-op. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cacheflush.h | 6 +++++- arch/arm64/mm/flush.c | 7 ------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index e9f64ecb75ce..5a2a6ee65f65 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -65,7 +65,6 @@ * - kaddr - page address * - size - region size */ -extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); @@ -82,6 +81,11 @@ static inline void flush_cache_page(struct vm_area_struct *vma, { } +static inline void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + /* * Cache maintenance functions used by the DMA API. No to be used directly. */ diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 2d78d5a9b89f..554a2558c12e 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -25,13 +25,6 @@ #include #include -void flush_cache_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - if (vma->vm_flags & VM_EXEC) - __flush_icache_all(); -} - void sync_icache_aliases(void *kaddr, unsigned long len) { unsigned long addr = (unsigned long)kaddr; -- cgit v1.2.3 From 1650ac49c2a0fb8188a2c8eac36537640ed57892 Mon Sep 17 00:00:00 2001 From: Jintack Date: Mon, 28 Nov 2016 21:13:02 -0500 Subject: arm64: head.S: Fix CNTHCTL_EL2 access on VHE system Bit positions of CNTHCTL_EL2 are changing depending on HCR_EL2.E2H bit. EL1PCEN and EL1PCTEN are 1st and 0th bits when E2H is not set, but they are 11th and 10th bits respectively when E2H is set. Current code is unintentionally setting wrong bits to CNTHCTL_EL2 with E2H set. In fact, we don't need to set those two bits, which allow EL1 and EL0 to access physical timer and counter respectively, if E2H and TGE are set for the host kernel. They will be configured later as necessary. First, we don't need to configure those bits for EL1, since the host kernel runs in EL2. It is a hypervisor's responsibility to configure them before entering a VM, which runs in EL0 and EL1. Second, EL0 accesses are configured in the later stage of boot process. Signed-off-by: Jintack Lim Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7ee6d74101cf..4b1abac3485a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -525,10 +525,21 @@ set_hcr: msr hcr_el2, x0 isb - /* Generic timers. */ + /* + * Allow Non-secure EL1 and EL0 to access physical timer and counter. + * This is not necessary for VHE, since the host kernel runs in EL2, + * and EL0 accesses are configured in the later stage of boot process. + * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout + * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined + * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 + * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in + * EL2. + */ + cbnz x2, 1f mrs x0, cnthctl_el2 orr x0, x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 +1: msr cntvoff_el2, xzr // Clear virtual offset #ifdef CONFIG_ARM_GIC_V3 -- cgit v1.2.3 From 34a6980c82fb1342e7064844c95aa4cf933e5ecc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Dec 2016 15:55:13 +0000 Subject: arm64: smp: Prevent raw_smp_processor_id() recursion Under CONFIG_DEBUG_PREEMPT=y, this_cpu_ptr() ends up calling back into raw_smp_processor_id(), resulting in some hilariously catastrophic infinite recursion. In the normal case, we have: #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) and everything is dandy. However for CONFIG_DEBUG_PREEMPT, this_cpu_ptr() is defined in terms of my_cpu_offset, wherein the fun begins: #define my_cpu_offset per_cpu_offset(smp_processor_id()) ... #define smp_processor_id() debug_smp_processor_id() ... notrace unsigned int debug_smp_processor_id(void) { return check_preemption_disabled("smp_processor_id", ""); ... notrace static unsigned int check_preemption_disabled(const char *what1, const char *what2) { int this_cpu = raw_smp_processor_id(); and bang. Use raw_cpu_ptr() directly to avoid that. Fixes: 57c82954e77f ("arm64: make cpu number a percpu variable") Reported-by: Marek Szyprowski Acked-by: Will Deacon Signed-off-by: Robin Murphy Tested-by: Marek Szyprowski Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/smp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index a62db952ffcb..d050d720a1b4 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -41,8 +41,10 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * We don't use this_cpu_read(cpu_number) as that has implicit writes to * preempt_count, and associated (compiler) barriers, that we'd like to avoid * the expense of. If we're preemptible, the value can be stale at use anyway. + * And we can't use this_cpu_ptr() either, as that winds up recursing back + * here under CONFIG_DEBUG_PREEMPT=y. */ -#define raw_smp_processor_id() (*this_cpu_ptr(&cpu_number)) +#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number)) struct seq_file; -- cgit v1.2.3 From bca8f17f57bd76ddf2bbd2527eb890d6f588853e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 1 Dec 2016 10:44:33 +0000 Subject: arm64: Get rid of asm/opcodes.h The opcodes.h drags in a lot of definition from the 32bit port, most of which is not required at all. Clean things up a bit by moving the bare minimum of what is required next to the actual users, and drop the include file. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/opcodes.h | 5 ----- arch/arm64/include/asm/sysreg.h | 16 ++++++++++------ arch/arm64/kernel/armv8_deprecated.c | 5 ++++- arch/arm64/kernel/insn.c | 1 - 4 files changed, 14 insertions(+), 13 deletions(-) delete mode 100644 arch/arm64/include/asm/opcodes.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h deleted file mode 100644 index 123f45d92cd1..000000000000 --- a/arch/arm64/include/asm/opcodes.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef CONFIG_CPU_BIG_ENDIAN -#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN -#endif - -#include <../../arm/include/asm/opcodes.h> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6c80b3699cb8..9e16a185badb 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -22,8 +22,6 @@ #include -#include - /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -37,6 +35,12 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) +#ifdef __ASSEMBLY__ +#define __emit_inst(x) .inst (x) +#else +#define __emit_inst(x) ".inst " __stringify((x)) "\n\t" +#endif + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -81,10 +85,10 @@ #define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) -#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\ - (!!x)<<8 | 0x1f) +#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ + (!!x)<<8 | 0x1f) +#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ + (!!x)<<8 | 0x1f) /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_EE (1 << 25) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index bdb35b92003e..04de188a36c9 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -351,6 +350,10 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDTEST_FAIL 0 +#define ARM_OPCODE_CONDTEST_PASS 1 +#define ARM_OPCODE_CONDTEST_UNCOND 2 + #define ARM_OPCODE_CONDITION_UNCOND 0xf static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 6f2ac4fc66ca..94b62c1fa4df 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #define AARCH64_INSN_SF_BIT BIT(31) -- cgit v1.2.3 From 49f5522e495aba58feb1ca58123805929710bddd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 3 Dec 2016 14:05:37 +0000 Subject: arm64: Remove reference to asm/opcodes.h The asm/opcodes.h file is now gone, but probes.h still references it for not obvious reason. Removing the #include directive fixes the compilation. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/probes.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index e175a825b187..6a5b28904c33 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -15,8 +15,6 @@ #ifndef _ARM_PROBES_H #define _ARM_PROBES_H -#include - typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); -- cgit v1.2.3 From bbb56c27228d4ad64aca858c5af49d0f2f11c645 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Dec 2016 15:27:43 +0000 Subject: arm64: Add detection code for broken .inst support in binutils Binutils version up to (and including) 2.25 have a pathological behaviour when it comes to mixing .inst directive and arithmetic involving labels. The assembler complains about non-constant expressions and compilation stops pretty quickly. In order to detect this and work around it, let's add a bit of detection code that will set the CONFIG_BROKEN_GAS_INST option should a broken gas be detected. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 3635b8662724..b9a4a934ca05 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -37,10 +37,16 @@ $(warning LSE atomics not supported by binutils) endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) +brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1) + +ifneq ($(brokengasinst),) +$(warning Detected assembler with broken .inst; disassembly will be unreliable) +endif + +KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) -KBUILD_AFLAGS += $(lseinstr) +KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian -- cgit v1.2.3 From cd9e1927a525f6ce7c0d99c6e038f0a0b9e85176 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Dec 2016 15:27:45 +0000 Subject: arm64: Work around broken .inst when defective gas is detected .inst being largely broken with older binutils, it'd be better not to emit it altogether when detecting such configuration (as it leads to all kind of horrors when using alternatives). Generalize the __emit_inst macro and use it extensively in asm/sysreg.h, and make it generate a .long when a broken gas is detected. The disassembly will be crap, but at least we can write semi-sane code. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e16a185badb..98ae03f8eedd 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -35,12 +35,33 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) +#ifndef CONFIG_BROKEN_GAS_INST + #ifdef __ASSEMBLY__ #define __emit_inst(x) .inst (x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif +#else /* CONFIG_BROKEN_GAS_INST */ + +#ifndef CONFIG_CPU_BIG_ENDIAN +#define __INSTR_BSWAP(x) (x) +#else /* CONFIG_CPU_BIG_ENDIAN */ +#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \ + (((x) << 8) & 0x00ff0000) | \ + (((x) >> 8) & 0x0000ff00) | \ + (((x) >> 24) & 0x000000ff)) +#endif /* CONFIG_CPU_BIG_ENDIAN */ + +#ifdef __ASSEMBLY__ +#define __emit_inst(x) .long __INSTR_BSWAP(x) +#else /* __ASSEMBLY__ */ +#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t" +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_BROKEN_GAS_INST */ + #define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) #define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) #define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) @@ -232,11 +253,11 @@ .equ .L__reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt) + __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) .endm .macro msr_s, sreg, rt - .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt) + __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) .endm #else @@ -250,11 +271,11 @@ asm( " .equ .L__reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n" + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n" + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) " .endm\n" ); -- cgit v1.2.3 From 75037120e62b58c536999eb23d70cfcb6d6c0bcc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 12 Dec 2016 13:50:26 +0000 Subject: arm64: Disable PAN on uaccess_enable() Commit 4b65a5db3627 ("arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1") added conditional user access enable/disable. Unfortunately, a typo prevents the PAN bit from being cleared for user access functions. Restore the PAN functionality by adding the missing '!'. Fixes: b65a5db3627 ("arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1") Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6986f56cfa88..d26750ca6e06 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ do { \ #define __uaccess_enable(alt) \ do { \ - if (uaccess_ttbr0_enable()) \ + if (!uaccess_ttbr0_enable()) \ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ CONFIG_ARM64_PAN)); \ } while (0) -- cgit v1.2.3