From 7655abb953860485940d4de74fb45a8192149bb6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:19:09 +0100 Subject: arm64: mm: Move ASID from TTBR0 to TTBR1 In preparation for mapping kernelspace and userspace with different ASIDs, move the ASID to TTBR1 and update switch_mm to context-switch TTBR0 via an invalid mapping (the zero page). Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/mm/proc.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 95233dfc4c39..a8a64898a2aa 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -139,9 +139,12 @@ ENDPROC(cpu_do_resume) */ ENTRY(cpu_do_switch_mm) pre_ttbr0_update_workaround x0, x2, x3 + mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id - bfi x0, x1, #48, #16 // set the ASID - msr ttbr0_el1, x0 // set TTBR0 + bfi x2, x1, #48, #16 // set the ASID + msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) + isb + msr ttbr0_el1, x0 // now update TTBR0 isb post_ttbr0_update_workaround ret @@ -224,7 +227,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 + TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1 tcr_set_idmap_t0sz x10, x9 /* -- cgit v1.2.3 From 85d13c001497b481b4a8cc5d7db243cc44ac2bd8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:29:06 +0100 Subject: arm64: mm: Remove pre_ttbr0_update_workaround for Falkor erratum #E1003 The pre_ttbr0_update_workaround hook is called prior to context-switching TTBR0 because Falkor erratum E1003 can cause TLB allocation with the wrong ASID if both the ASID and the base address of the TTBR are updated at the same time. With the ASID sitting safely in TTBR1, we no longer update things atomically, so we can remove the pre_ttbr0_update_workaround macro as it's no longer required. The erratum infrastructure and documentation is left around for #E1003, as it will be required by the entry trampoline code in a future patch. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 22 ---------------------- arch/arm64/include/asm/mmu_context.h | 2 -- arch/arm64/mm/context.c | 11 ----------- arch/arm64/mm/proc.S | 1 - 4 files changed, 36 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aef72d886677..e1fa5db858b7 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -478,27 +477,6 @@ alternative_endif .endm /* - * Errata workaround prior to TTBR0_EL1 update - * - * val: TTBR value with new BADDR, preserved - * tmp0: temporary register, clobbered - * tmp1: other temporary register, clobbered - */ - .macro pre_ttbr0_update_workaround, val, tmp0, tmp1 -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 - mrs \tmp0, ttbr0_el1 - mov \tmp1, #FALKOR_RESERVED_ASID - bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR - msr ttbr0_el1, \tmp0 - isb - bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR - msr ttbr0_el1, \tmp0 - isb -alternative_else_nop_endif -#endif - .endm - /* * Errata workaround post TTBR0_EL1 update. */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index aa39c126c0d0..da29766a181c 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -19,8 +19,6 @@ #ifndef __ASM_MMU_CONTEXT_H #define __ASM_MMU_CONTEXT_H -#define FALKOR_RESERVED_ASID 1 - #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 6f4017046323..78a2dc596fee 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -79,13 +79,6 @@ void verify_cpu_asid_bits(void) } } -static void set_reserved_asid_bits(void) -{ - if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) && - cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003)) - __set_bit(FALKOR_RESERVED_ASID, asid_map); -} - static void flush_context(unsigned int cpu) { int i; @@ -94,8 +87,6 @@ static void flush_context(unsigned int cpu) /* Update the list of reserved ASIDs and the ASID bitmap. */ bitmap_clear(asid_map, 0, NUM_USER_ASIDS); - set_reserved_asid_bits(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -254,8 +245,6 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); - set_reserved_asid_bits(); - pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a8a64898a2aa..f2ff0837577c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -138,7 +138,6 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - pre_ttbr0_update_workaround x0, x2, x3 mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id bfi x2, x1, #48, #16 // set the ASID -- cgit v1.2.3 From 158d495899ce55db453f682a8ac8390d5a426578 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:34:30 +0100 Subject: arm64: mm: Rename post_ttbr0_update_workaround The post_ttbr0_update_workaround hook applies to any change to TTBRx_EL1. Since we're using TTBR1 for the ASID, rename the hook to make it clearer as to what it's doing. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 5 ++--- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/proc.S | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e1fa5db858b7..c45bc94f15d0 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -477,10 +477,9 @@ alternative_endif .endm /* -/* - * Errata workaround post TTBR0_EL1 update. + * Errata workaround post TTBRx_EL1 update. */ - .macro post_ttbr0_update_workaround + .macro post_ttbr_update_workaround #ifdef CONFIG_CAVIUM_ERRATUM_27456 alternative_if ARM64_WORKAROUND_CAVIUM_27456 ic iallu diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6d14b8f29b5f..804e43c9cb0b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -257,7 +257,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr0_update_workaround + post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f2ff0837577c..3146dc96f05b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -145,7 +145,7 @@ ENTRY(cpu_do_switch_mm) isb msr ttbr0_el1, x0 // now update TTBR0 isb - post_ttbr0_update_workaround + post_ttbr_update_workaround ret ENDPROC(cpu_do_switch_mm) -- cgit v1.2.3 From 27a921e75711d924617269e0ba4adb8bae9fd0d1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 13:58:16 +0100 Subject: arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN With the ASID now installed in TTBR1, we can re-enable ARM64_SW_TTBR0_PAN by ensuring that we switch to a reserved ASID of zero when disabling user access and restore the active user ASID on the uaccess enable path. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/asm-uaccess.h | 25 +++++++++++++++++-------- arch/arm64/include/asm/uaccess.h | 21 +++++++++++++++++---- arch/arm64/kernel/entry.S | 4 ++-- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/xen/hypercall.S | 2 +- 10 files changed, 42 insertions(+), 21 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e7d7fd152c4..a93339f5178f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -910,7 +910,6 @@ endif config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" - depends on BROKEN # Temporary while switch_mm is reworked help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index b3da6c886835..21b8cf304028 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -16,11 +16,20 @@ add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb + sub \tmp1, \tmp1, #SWAPPER_DIR_SIZE + bic \tmp1, \tmp1, #(0xffff << 48) + msr ttbr1_el1, \tmp1 // set reserved ASID + isb .endm - .macro __uaccess_ttbr0_enable, tmp1 + .macro __uaccess_ttbr0_enable, tmp1, tmp2 get_thread_info \tmp1 ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1 + mrs \tmp2, ttbr1_el1 + extr \tmp2, \tmp2, \tmp1, #48 + ror \tmp2, \tmp2, #16 + msr ttbr1_el1, \tmp2 // set the active ASID + isb msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1 isb .endm @@ -31,18 +40,18 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 alternative_if_not ARM64_HAS_PAN - save_and_disable_irq \tmp2 // avoid preemption - __uaccess_ttbr0_enable \tmp1 - restore_irq \tmp2 + save_and_disable_irq \tmp3 // avoid preemption + __uaccess_ttbr0_enable \tmp1, \tmp2 + restore_irq \tmp3 alternative_else_nop_endif .endm #else .macro uaccess_ttbr0_disable, tmp1 .endm - .macro uaccess_ttbr0_enable, tmp1, tmp2 + .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 .endm #endif @@ -56,8 +65,8 @@ alternative_if ARM64_ALT_PAN_NOT_UAO alternative_else_nop_endif .endm - .macro uaccess_enable_not_uao, tmp1, tmp2 - uaccess_ttbr0_enable \tmp1, \tmp2 + .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3 + uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index fc0f9eb66039..750a3b76a01c 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -107,15 +107,19 @@ static inline void __uaccess_ttbr0_disable(void) { unsigned long ttbr; + ttbr = read_sysreg(ttbr1_el1); /* reserved_ttbr0 placed at the end of swapper_pg_dir */ - ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE; - write_sysreg(ttbr, ttbr0_el1); + write_sysreg(ttbr + SWAPPER_DIR_SIZE, ttbr0_el1); + isb(); + /* Set reserved ASID */ + ttbr &= ~(0xffffUL << 48); + write_sysreg(ttbr, ttbr1_el1); isb(); } static inline void __uaccess_ttbr0_enable(void) { - unsigned long flags; + unsigned long flags, ttbr0, ttbr1; /* * Disable interrupts to avoid preemption between reading the 'ttbr0' @@ -123,7 +127,16 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - write_sysreg(current_thread_info()->ttbr0, ttbr0_el1); + ttbr0 = current_thread_info()->ttbr0; + + /* Restore active ASID */ + ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 |= ttbr0 & (0xffffUL << 48); + write_sysreg(ttbr1, ttbr1_el1); + isb(); + + /* Restore user page table */ + write_sysreg(ttbr0, ttbr0_el1); isb(); local_irq_restore(flags); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 804e43c9cb0b..d454d8ed45e4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -184,7 +184,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr0_el1 + mrs x21, ttbr1_el1 tst x21, #0xffff << 48 // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled @@ -248,7 +248,7 @@ alternative_else_nop_endif tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set .endif - __uaccess_ttbr0_enable x0 + __uaccess_ttbr0_enable x0, x1 .if \el == 0 /* diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index e88fb99c1561..8f9c4641e706 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -30,7 +30,7 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) - uaccess_enable_not_uao x2, x3 + uaccess_enable_not_uao x2, x3, x4 mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 4b5d826895ff..69d86a80f3e2 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -64,7 +64,7 @@ end .req x5 ENTRY(__arch_copy_from_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index b24a830419ad..e442b531252a 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -65,7 +65,7 @@ end .req x5 ENTRY(raw_copy_in_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 351f0766f7a6..318f15d5c336 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -63,7 +63,7 @@ end .req x5 ENTRY(__arch_copy_to_user) - uaccess_enable_not_uao x3, x4 + uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" uaccess_disable_not_uao x3 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7f1dbe962cf5..6cd20a8c0952 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -49,7 +49,7 @@ ENTRY(flush_icache_range) * - end - virtual end address of region */ ENTRY(__flush_cache_user_range) - uaccess_ttbr0_enable x2, x3 + uaccess_ttbr0_enable x2, x3, x4 dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x0, x3 diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 401ceb71540c..acdbd2c9e899 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -101,7 +101,7 @@ ENTRY(privcmd_call) * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation * is enabled (it implies that hardware UAO and PAN disabled). */ - uaccess_ttbr0_enable x6, x7 + uaccess_ttbr0_enable x6, x7, x8 hvc XEN_IMM /* -- cgit v1.2.3 From 0c8ea531b7740754cf374ca8b7510655f569c5e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Aug 2017 14:10:28 +0100 Subject: arm64: mm: Allocate ASIDs in pairs In preparation for separate kernel/user ASIDs, allocate them in pairs for each mm_struct. The bottom bit distinguishes the two: if it is set, then the ASID will map only userspace. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 1 + arch/arm64/mm/context.c | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 0d34bf0a89c7..01bfb184f2a8 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,6 +17,7 @@ #define __ASM_MMU_H #define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */ +#define USER_ASID_FLAG (UL(1) << 48) typedef struct { atomic64_t id; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 78a2dc596fee..1cb3bc92ae5c 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#define NUM_USER_ASIDS ASID_FIRST_VERSION + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) +#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) +#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) +#else +#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define asid2idx(asid) ((asid) & ~ASID_MASK) +#define idx2asid(idx) asid2idx(idx) +#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -98,7 +107,7 @@ static void flush_context(unsigned int cpu) */ if (asid == 0) asid = per_cpu(reserved_asids, i); - __set_bit(asid & ~ASID_MASK, asid_map); + __set_bit(asid2idx(asid), asid_map); per_cpu(reserved_asids, i) = asid; } @@ -153,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) * We had a valid ASID in a previous life, so try to re-use * it if possible. */ - asid &= ~ASID_MASK; - if (!__test_and_set_bit(asid, asid_map)) + if (!__test_and_set_bit(asid2idx(asid), asid_map)) return newasid; } /* * Allocate a free ASID. If we can't find one, take a note of the - * currently active ASIDs and mark the TLBs as requiring flushes. - * We always count from ASID #1, as we use ASID #0 when setting a - * reserved TTBR0 for the init_mm. + * currently active ASIDs and mark the TLBs as requiring flushes. We + * always count from ASID #2 (index 1), as we use ASID #0 when setting + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd + * pairs. */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); if (asid != NUM_USER_ASIDS) @@ -179,7 +188,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) set_asid: __set_bit(asid, asid_map); cur_idx = asid; - return asid | generation; + return idx2asid(asid) | generation; } void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) -- cgit v1.2.3 From 51a0048beb449682d632d0af52a515adb9f9882e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 14 Nov 2017 14:14:17 +0000 Subject: arm64: mm: Map entry trampoline into trampoline and kernel page tables The exception entry trampoline needs to be mapped at the same virtual address in both the trampoline page table (which maps nothing else) and also the kernel page table, so that we can swizzle TTBR1_EL1 on exceptions from and return to EL0. This patch maps the trampoline at a fixed virtual address in the fixmap area of the kernel virtual address space, which allows the kernel proper to be randomized with respect to the trampoline when KASLR is enabled. Reviewed-by: Mark Rutland Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 4 ++++ arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/asm-offsets.c | 6 +++++- arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4052ec39e8db..8119b49be98d 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -58,6 +58,10 @@ enum fixed_addresses { FIX_APEI_GHES_NMI, #endif /* CONFIG_ACPI_APEI_GHES */ +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT, +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 149d05fb9421..774003b247ad 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -680,6 +680,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 71bf088f1e4b..af247d10252f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -148,11 +149,14 @@ int main(void) DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); - BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next)); DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val)); + BLANK(); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + DEFINE(TRAMP_VALIAS, TRAMP_VALIAS); +#endif return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..fe68a48c64cb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -525,6 +525,29 @@ static int __init parse_rodata(char *arg) } early_param("rodata", parse_rodata); +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static int __init map_entry_trampoline(void) +{ + extern char __entry_tramp_text_start[]; + + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; + phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); + + /* The trampoline is always mapped and can therefore be global */ + pgprot_val(prot) &= ~PTE_NG; + + /* Map only the text into the trampoline page table */ + memset(tramp_pg_dir, 0, PGD_SIZE); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, + prot, pgd_pgtable_alloc, 0); + + /* ...as well as the kernel page table */ + __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + return 0; +} +core_initcall(map_entry_trampoline); +#endif + /* * Create fine-grained mappings for the kernel. */ -- cgit v1.2.3 From 6c27c4082f4f70b9f41df4d0adf51128b40351df Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Dec 2017 11:24:02 +0000 Subject: arm64: kaslr: Put kernel vectors address in separate data page The literal pool entry for identifying the vectors base is the only piece of information in the trampoline page that identifies the true location of the kernel. This patch moves it into a page-aligned region of the .rodata section and maps this adjacent to the trampoline text via an additional fixmap entry, which protects against any accidental leakage of the trampoline contents. Suggested-by: Ard Biesheuvel Tested-by: Laura Abbott Tested-by: Shanker Donthineni Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/kernel/entry.S | 14 ++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 5 ++++- arch/arm64/mm/mmu.c | 10 +++++++++- 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 8119b49be98d..ec1e6d6fa14c 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -59,6 +59,7 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_DATA, FIX_ENTRY_TRAMP_TEXT, #define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3eabcb194c87..031392ee5f47 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1030,7 +1030,13 @@ alternative_else_nop_endif msr tpidrro_el0, x30 // Restored in kernel_ventry .endif tramp_map_kernel x30 +#ifdef CONFIG_RANDOMIZE_BASE + adr x30, tramp_vectors + PAGE_SIZE +alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 + ldr x30, [x30] +#else ldr x30, =vectors +#endif prfm plil1strm, [x30, #(1b - tramp_vectors)] msr vbar_el1, x30 add x30, x30, #(1b - tramp_vectors) @@ -1073,6 +1079,14 @@ END(tramp_exit_compat) .ltorg .popsection // .entry.tramp.text +#ifdef CONFIG_RANDOMIZE_BASE + .pushsection ".rodata", "a" + .align PAGE_SHIFT + .globl __entry_tramp_data_start +__entry_tramp_data_start: + .quad vectors + .popsection // .rodata +#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6b4260f22aab..ddfd3c0942f7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -251,7 +251,10 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K, ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif - +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, + "Entry trampoline text too big") +#endif /* * If padding is applied before .head.text, virt<->phys conversions will fail. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index fe68a48c64cb..916d9ced1c3f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -541,8 +541,16 @@ static int __init map_entry_trampoline(void) __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, prot, pgd_pgtable_alloc, 0); - /* ...as well as the kernel page table */ + /* Map both the text and data into the kernel page table */ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + extern char __entry_tramp_data_start[]; + + __set_fixmap(FIX_ENTRY_TRAMP_DATA, + __pa_symbol(__entry_tramp_data_start), + PAGE_KERNEL_RO); + } + return 0; } core_initcall(map_entry_trampoline); -- cgit v1.2.3 From 787fd1d019b269af7912249231dfe34a5fe3e7c8 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:17 +0000 Subject: arm64: limit PA size to supported range We currently copy the physical address size from ID_AA64MMFR0_EL1.PARange directly into TCR.(I)PS. This will not work for 4k and 16k granule kernels on systems that support 52-bit physical addresses, since 52-bit addresses are only permitted with the 64k granule. To fix this, fall back to 48 bits when configuring the PA size when the kernel does not support 52-bit PAs. When it does, fall back to 52, to avoid similar problems in the future if the PA size is ever increased above 52. Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko [catalin.marinas@arm.com: tcr_set_pa_size macro renamed to tcr_compute_pa_size] [catalin.marinas@arm.com: comments added to tcr_compute_pa_size] [catalin.marinas@arm.com: definitions added for TCR_*PS_SHIFT] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 18 ++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 2 ++ arch/arm64/include/asm/sysreg.h | 8 ++++++++ arch/arm64/kvm/hyp-init.S | 6 ++---- arch/arm64/kvm/hyp/s2-setup.c | 2 ++ arch/arm64/mm/proc.S | 6 ++---- 6 files changed, 34 insertions(+), 8 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index aef72d886677..04a92307e6c1 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -350,6 +350,24 @@ alternative_endif #endif .endm +/* + * tcr_compute_pa_size - set TCR.(I)PS to the highest supported + * ID_AA64MMFR0_EL1.PARange value + * + * tcr: register with the TCR_ELx value to be updated + * pos: PARange bitfield position + * tmp{0,1}: temporary registers + */ + .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1 + mrs \tmp0, ID_AA64MMFR0_EL1 + // Narrow PARange to fit the PS field in TCR_ELx + ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3 + mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX + cmp \tmp0, \tmp1 + csel \tmp0, \tmp1, \tmp0, hi + bfi \tcr, \tmp0, \pos, #3 + .endm + /* * Macro to perform a data cache maintenance for the interval * [kaddr, kaddr + size) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c1de9f67980b..9be2e9371c52 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -272,6 +272,8 @@ #define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT) #define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT) +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) #define TCR_HA (UL(1) << 39) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 08cc88574659..ec144f480b39 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -471,6 +471,14 @@ #define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 #define ID_AA64MMFR0_TGRAN16_NI 0x0 #define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_AA64MMFR0_PARANGE_48 0x5 +#define ID_AA64MMFR0_PARANGE_52 0x6 + +#ifdef CONFIG_ARM64_PA_BITS_52 +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52 +#else +#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48 +#endif /* id_aa64mmfr1 */ #define ID_AA64MMFR1_PAN_SHIFT 20 diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 3f9615582377..e2d1fe03662a 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -90,11 +90,9 @@ __do_hyp_init: bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH #endif /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * TCR_EL2. + * Set the PS bits in TCR_EL2. */ - mrs x5, ID_AA64MMFR0_EL1 - bfi x4, x5, #16, #3 + tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6 msr tcr_el2, x4 diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c index a81f5e10fc8c..603e1ee83e89 100644 --- a/arch/arm64/kvm/hyp/s2-setup.c +++ b/arch/arm64/kvm/hyp/s2-setup.c @@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void) * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2... */ parange = read_sysreg(id_aa64mmfr0_el1) & 7; + if (parange > ID_AA64MMFR0_PARANGE_MAX) + parange = ID_AA64MMFR0_PARANGE_MAX; val |= parange << 16; /* Compute the actual PARange... */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 95233dfc4c39..4f133cb340dc 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -228,11 +228,9 @@ ENTRY(__cpu_setup) tcr_set_idmap_t0sz x10, x9 /* - * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in - * TCR_EL1. + * Set the IPS bits in TCR_EL1. */ - mrs x9, ID_AA64MMFR0_EL1 - bfi x10, x9, #32, #3 + tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Hardware update of the Access and Dirty bits. -- cgit v1.2.3 From 529c4b05a3cb2f324aac347042ee6d641478e946 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:18 +0000 Subject: arm64: handle 52-bit addresses in TTBR The top 4 bits of a 52-bit physical address are positioned at bits 2..5 in the TTBR registers. Introduce a couple of macros to move the bits there, and change all TTBR writers to use them. Leave TTBR0 PAN code unchanged, to avoid complicating it. A system with 52-bit PA will have PAN anyway (because it's ARMv8.1 or later), and a system without 52-bit PA can only use up to 48-bit PAs. A later patch in this series will add a kconfig dependency to ensure PAN is configured. In addition, when using 52-bit PA there is a special alignment requirement on the top-level table. We don't currently have any VA_BITS configuration that would violate the requirement, but one could be added in the future, so add a compile-time BUG_ON to check for it. Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko [catalin.marinas@arm.com: added TTBR_BADD_MASK_52 comment] Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/assembler.h | 16 ++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 2 ++ arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/pgtable-hwdef.h | 13 +++++++++++++ arch/arm64/include/asm/pgtable.h | 6 ++++++ arch/arm64/kernel/head.S | 6 ++++-- arch/arm64/kernel/hibernate-asm.S | 12 +++++++----- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/kvm/hyp-init.S | 3 ++- arch/arm64/mm/pgd.c | 8 ++++++++ arch/arm64/mm/proc.S | 13 ++++++++----- virt/kvm/arm/arm.c | 2 +- 13 files changed, 71 insertions(+), 16 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index fa6f2174276b..8dbec683638b 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -221,6 +221,8 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +#define kvm_phys_to_vttbr(addr) (addr) + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 04a92307e6c1..49ea3def4bd1 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -530,4 +530,20 @@ alternative_else_nop_endif #endif .endm +/* + * Arrange a physical address in a TTBR register, taking care of 52-bit + * addresses. + * + * phys: physical address, preserved + * ttbr: returns the TTBR value + */ + .macro phys_to_ttbr, phys, ttbr +#ifdef CONFIG_ARM64_PA_BITS_52 + orr \ttbr, \phys, \phys, lsr #46 + and \ttbr, \ttbr, #TTBR_BADDR_MASK_52 +#else + mov \ttbr, \phys +#endif + .endm + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 672c8684d5c2..747bfff92948 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -309,5 +309,7 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 9d155fa9a507..accc2ff32a0e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -51,7 +51,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = __pa_symbol(empty_zero_page); + unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page)); write_sysreg(ttbr, ttbr0_el1); isb(); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9be2e9371c52..f92be11a209a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H +#include + /* * Number of page-table levels required to address 'va_bits' wide * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) @@ -279,4 +281,15 @@ #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +/* + * TTBR. + */ +#ifdef CONFIG_ARM64_PA_BITS_52 +/* + * This should be GENMASK_ULL(47, 2). + * TTBR_ELx[1] is RES0 in this configuration. + */ +#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) +#endif + #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 149d05fb9421..93677b9db947 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -733,6 +733,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define kc_vaddr_to_offset(v) ((v) & ~VA_START) #define kc_offset_to_vaddr(o) ((o) | VA_START) +#ifdef CONFIG_ARM64_PA_BITS_52 +#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) +#else +#define phys_to_ttbr(addr) (addr) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 67e86a0f57ac..0addea3760a6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -679,8 +679,10 @@ ENTRY(__enable_mmu) update_early_cpu_boot_status 0, x1, x2 adrp x1, idmap_pg_dir adrp x2, swapper_pg_dir - msr ttbr0_el1, x1 // load TTBR0 - msr ttbr1_el1, x2 // load TTBR1 + phys_to_ttbr x1, x3 + phys_to_ttbr x2, x4 + msr ttbr0_el1, x3 // load TTBR0 + msr ttbr1_el1, x4 // load TTBR1 isb msr sctlr_el1, x0 isb diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index e56d848b6466..84f5d52fddda 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -33,12 +33,14 @@ * Even switching to our copied tables will cause a changed output address at * each stage of the walk. */ -.macro break_before_make_ttbr_switch zero_page, page_table - msr ttbr1_el1, \zero_page +.macro break_before_make_ttbr_switch zero_page, page_table, tmp + phys_to_ttbr \zero_page, \tmp + msr ttbr1_el1, \tmp isb tlbi vmalle1 dsb nsh - msr ttbr1_el1, \page_table + phys_to_ttbr \page_table, \tmp + msr ttbr1_el1, \tmp isb .endm @@ -78,7 +80,7 @@ ENTRY(swsusp_arch_suspend_exit) * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page */ - break_before_make_ttbr_switch x5, x0 + break_before_make_ttbr_switch x5, x0, x6 mov x21, x1 mov x30, x2 @@ -109,7 +111,7 @@ ENTRY(swsusp_arch_suspend_exit) dsb ish /* wait for PoU cleaning to finish */ /* switch to the restored kernels page tables */ - break_before_make_ttbr_switch x25, x21 + break_before_make_ttbr_switch x25, x21, x6 ic ialluis dsb ish diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3009b8b80f08..efbf6dbd93c8 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -264,7 +264,7 @@ static int create_safe_exec_page(void *src_start, size_t length, */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(virt_to_phys(pgd), ttbr0_el1); + write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys((void *)dst); diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index e2d1fe03662a..f9681cc00973 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -63,7 +63,8 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - msr ttbr0_el2, x0 + phys_to_ttbr x0, x4 + msr ttbr0_el2, x4 mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 051e71ec3335..289f9113a27a 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -49,6 +49,14 @@ void __init pgd_cache_init(void) if (PGD_SIZE == PAGE_SIZE) return; +#ifdef CONFIG_ARM64_PA_BITS_52 + /* + * With 52-bit physical addresses, the architecture requires the + * top-level table to be aligned to at least 64 bytes. + */ + BUILD_BUG_ON(PGD_SIZE < 64); +#endif + /* * Naturally aligned pgds required by the architecture. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 4f133cb340dc..e79db5a7576a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -138,10 +138,11 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - pre_ttbr0_update_workaround x0, x2, x3 + phys_to_ttbr x0, x2 + pre_ttbr0_update_workaround x2, x3, x4 mmid x1, x1 // get mm->context.id - bfi x0, x1, #48, #16 // set the ASID - msr ttbr0_el1, x0 // set TTBR0 + bfi x2, x1, #48, #16 // set the ASID + msr ttbr0_el1, x2 // set TTBR0 isb post_ttbr0_update_workaround ret @@ -158,14 +159,16 @@ ENTRY(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 adrp x1, empty_zero_page - msr ttbr1_el1, x1 + phys_to_ttbr x1, x3 + msr ttbr1_el1, x3 isb tlbi vmalle1 dsb nsh isb - msr ttbr1_el1, x0 + phys_to_ttbr x0, x3 + msr ttbr1_el1, x3 isb restore_daif x2 diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 6b60c98a6e22..c8d49879307f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm) pgd_phys = virt_to_phys(kvm->arch.pgd); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); - kvm->arch.vttbr = pgd_phys | vmid; + kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; spin_unlock(&kvm_vmid_lock); } -- cgit v1.2.3 From 193383043f14a398393dc18bae8380f7fe665ec3 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:20 +0000 Subject: arm64: don't open code page table entry creation Instead of open coding the generation of page table entries, use the macros/functions that exist for this - pfn_p*d and p*d_populate. Most code in the kernel already uses these macros, this patch tries to fix up the few places that don't. This is useful for the next patch in this series, which needs to change the page table entry logic, and it's better to have that logic in one place. The KVM extended ID map is special, since we're creating a level above CONFIG_PGTABLE_LEVELS and the required function isn't available. Leave it as is and add a comment to explain it. (The normal kernel ID map code doesn't need this change because its page tables are created in assembly (__create_page_tables)). Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_mmu.h | 5 +++++ arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/hibernate.c | 3 +-- arch/arm64/mm/mmu.c | 14 +++++++++----- 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 747bfff92948..9810ebf949b3 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -276,6 +276,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return __cpu_uses_extended_idmap(); } +/* + * Can't use pgd_populate here, because the extended idmap adds an extra level + * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended + * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. + */ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 93677b9db947..5d9554fb2692 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -355,6 +355,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pud_write(pud) pte_write(pud_pte(pud)) #define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pud(pfn,prot) (__pud(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index efbf6dbd93c8..f20cf7e99249 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } pte = pte_offset_kernel(pmd, dst_addr); - set_pte(pte, __pte(virt_to_phys((void *)dst) | - pgprot_val(PAGE_KERNEL_EXEC))); + set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..0c631a17ae1d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -570,8 +570,8 @@ static void __init map_kernel(pgd_t *pgd) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); + pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), + lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -686,7 +686,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); + pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); @@ -879,15 +879,19 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PUD_MASK); - set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; } -- cgit v1.2.3 From fa2a8445b1d3810c52f2a6b3a006456bd1aacb7e Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 13 Dec 2017 17:07:24 +0000 Subject: arm64: allow ID map to be extended to 52 bits Currently, when using VA_BITS < 48, if the ID map text happens to be placed in physical memory above VA_BITS, we increase the VA size (up to 48) and create a new table level, in order to map in the ID map text. This is okay because the system always supports 48 bits of VA. This patch extends the code such that if the system supports 52 bits of VA, and the ID map text is placed that high up, then we increase the VA size accordingly, up to 52. One difference from the current implementation is that so far the condition of VA_BITS < 48 has meant that the top level table is always "full", with the maximum number of entries, and an extra table level is always needed. Now, when VA_BITS = 48 (and using 64k pages), the top level table is not full, and we simply need to increase the number of entries in it, instead of creating a new table level. Tested-by: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Marc Zyngier Tested-by: Bob Picco Reviewed-by: Bob Picco Signed-off-by: Kristina Martsenko [catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()] [catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()] Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_mmu.h | 5 +++ arch/arm64/include/asm/assembler.h | 2 - arch/arm64/include/asm/kvm_mmu.h | 7 +++- arch/arm64/include/asm/mmu_context.h | 10 +++++ arch/arm64/kernel/head.S | 76 +++++++++++++++++++++--------------- arch/arm64/kvm/hyp-init.S | 17 ++++---- arch/arm64/mm/mmu.c | 1 + virt/kvm/arm/mmu.c | 10 ++++- 8 files changed, 83 insertions(+), 45 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 8dbec683638b..8c5643e2eea4 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return false; } +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return PTRS_PER_PGD; +} + static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 49ea3def4bd1..942fdb5ef0ad 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -344,10 +344,8 @@ alternative_endif * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map */ .macro tcr_set_idmap_t0sz, valreg, tmpreg -#ifndef CONFIG_ARM64_VA_BITS_48 ldr_l \tmpreg, idmap_t0sz bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH -#endif .endm /* diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b3f7b68b042d..b33bdb5eeb3d 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -273,7 +273,12 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); static inline bool __kvm_cpu_uses_extended_idmap(void) { - return __cpu_uses_extended_idmap(); + return __cpu_uses_extended_idmap_level(); +} + +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) +{ + return idmap_ptrs_per_pgd; } /* diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index accc2ff32a0e..88451d47036b 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,6 +63,7 @@ static inline void cpu_set_reserved_ttbr0(void) * physical memory, in which case it will be smaller. */ extern u64 idmap_t0sz; +extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { @@ -70,6 +71,15 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } +/* + * True if the extended ID map requires an extra level of translation table + * to be configured. + */ +static inline bool __cpu_uses_extended_idmap_level(void) +{ + return ARM64_HW_PGTABLE_LEVELS((64 - idmap_t0sz)) > CONFIG_PGTABLE_LEVELS; +} + /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index eeec0001e204..66f01869e97c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -176,7 +176,7 @@ ENDPROC(preserve_boot_args) * ptrs: #imm pointers per table page * * Preserves: virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs, tmp1, tmp2 * Returns: tbl -> next level table page address */ .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 @@ -184,7 +184,8 @@ ENDPROC(preserve_boot_args) phys_to_pte \tmp1, \tmp2 orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index + sub \ptrs, \ptrs, #1 + and \tmp1, \tmp1, \ptrs // table index str \tmp2, [\tbl, \tmp1, lsl #3] add \tbl, \tbl, #PAGE_SIZE // next level table page .endm @@ -194,15 +195,17 @@ ENDPROC(preserve_boot_args) * block entry in the next level (tbl) for the given virtual address. * * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 + * Corrupts: ptrs_per_pgd, tmp1, tmp2 */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 + .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS > 3 - create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PUD + create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif #if SWAPPER_PGTABLE_LEVELS > 2 - create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + mov \ptrs_per_pgd, PTRS_PER_PTE + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 #endif .endm @@ -266,26 +269,13 @@ __create_page_tables: adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) -#ifndef CONFIG_ARM64_VA_BITS_48 -#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) -#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) - - /* - * If VA_BITS < 48, it may be too small to allow for an ID mapping to be - * created that covers system RAM if that is located sufficiently high - * in the physical address space. So for the ID map, use an extended - * virtual range in that case, by configuring an additional translation - * level. - * First, we have to verify our assumption that the current value of - * VA_BITS was chosen such that all translation levels are fully - * utilised, and that lowering T0SZ will always result in an additional - * translation level to be configured. - */ -#if VA_BITS != EXTRA_SHIFT -#error "Mismatch between VA_BITS and page size/number of translation levels" -#endif - /* + * VA_BITS may be too small to allow for an ID mapping to be created + * that covers system RAM if that is located sufficiently high in the + * physical address space. So for the ID map, use an extended virtual + * range in that case, and configure an additional translation level + * if needed. + * * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the * entire ID map region can be mapped. As T0SZ == (64 - #bits used), * this number conveniently equals the number of leading zeroes in @@ -294,18 +284,41 @@ __create_page_tables: adrp x5, __idmap_text_end clz x5, x5 cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? - b.ge 1f // .. then skip additional level + b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz str x5, [x6] dmb sy dc ivac, x6 // Invalidate potentially stale cache line - create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 -1: +#if (VA_BITS < 48) +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, we have to configure an additional table level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" #endif - create_pgd_entry x0, x3, x5, x6 + mov x4, EXTRA_PTRS + create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6 +#else + /* + * If VA_BITS == 48, we don't have to configure an additional + * translation level, but the top-level table has more entries. + */ + mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT) + str_l x4, idmap_ptrs_per_pgd, x5 +#endif +1: + ldr_l x4, idmap_ptrs_per_pgd + create_pgd_entry x0, x3, x4, x5, x6 mov x5, x3 // __pa(__idmap_text_start) adr_l x6, __idmap_text_end // __pa(__idmap_text_end) create_block_map x0, x7, x3, x5, x6, x4 @@ -316,7 +329,8 @@ __create_page_tables: adrp x0, swapper_pg_dir mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement - create_pgd_entry x0, x5, x3, x6 + mov x4, PTRS_PER_PGD + create_pgd_entry x0, x5, x4, x3, x6 adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index f9681cc00973..33c40b3eea01 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -72,24 +72,23 @@ __do_hyp_init: mov x5, #TCR_EL2_RES1 orr x4, x4, x5 -#ifndef CONFIG_ARM64_VA_BITS_48 /* - * If we are running with VA_BITS < 48, we may be running with an extra - * level of translation in the ID map. This is only the case if system - * RAM is out of range for the currently configured page size and number - * of translation levels, in which case we will also need the extra - * level for the HYP ID map, or we won't be able to enable the EL2 MMU. + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. * * However, at EL2, there is only one TTBR register, and we can't switch * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need the extra level in *both* our translation - * tables. + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. * * So use the same T0SZ value we use for the ID map. */ ldr_l x5, idmap_t0sz bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH -#endif + /* * Set the PS bits in TCR_EL2. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c631a17ae1d..baa34418c3bf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -50,6 +50,7 @@ #define NO_CONT_MAPPINGS BIT(1) u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b36945d49986..761787befd3b 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp, { pgd_t *pgd; pud_t *pud; - unsigned long addr, next; + unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD; int err = 0; + /* + * If it's not the hyp_pgd, fall back to the kvm idmap layout. + */ + if (pgdp != hyp_pgd) + ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd(); + mutex_lock(&kvm_hyp_pgd_mutex); addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + pgd_index(addr); + pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); -- cgit v1.2.3 From a8ffaaa060b8d4da6138e0958cb0f45b73e1cb78 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Dec 2017 15:12:56 +0000 Subject: arm64: asid: Do not replace active_asids if already 0 Under some uncommon timing conditions, a generation check and xchg(active_asids, A1) in check_and_switch_context() on P1 can race with an ASID roll-over on P2. If P2 has not seen the update to active_asids[P1], it can re-allocate A1 to a new task T2 on P2. P1 ends up waiting on the spinlock since the xchg() returned 0 while P2 can go through a second ASID roll-over with (T2,A1,G2) active on P2. This roll-over copies active_asids[P1] == A1,G1 into reserved_asids[P1] and active_asids[P2] == A1,G2 into reserved_asids[P2]. A subsequent scheduling of T1 on P1 and T2 on P2 would match reserved_asids and get their generation bumped to G3: P1 P2 -- -- TTBR0.BADDR = T0 TTBR0.ASID = A0 asid_generation = G1 check_and_switch_context(T1,A1,G1) generation match check_and_switch_context(T2,A0,G0) new_context() ASID roll-over asid_generation = G2 flush_context() active_asids[P1] = 0 asid_map[A1] = 0 reserved_asids[P1] = A0,G0 xchg(active_asids, A1) active_asids[P1] = A1,G1 xchg returns 0 spin_lock_irqsave() allocated ASID (T2,A1,G2) asid_map[A1] = 1 active_asids[P2] = A1,G2 ... check_and_switch_context(T3,A0,G0) new_context() ASID roll-over asid_generation = G3 flush_context() active_asids[P1] = 0 asid_map[A1] = 1 reserved_asids[P1] = A1,G1 reserved_asids[P2] = A1,G2 allocated ASID (T3,A2,G3) asid_map[A2] = 1 active_asids[P2] = A2,G3 new_context() check_update_reserved_asid(A1,G1) matches reserved_asid[P1] reserved_asid[P1] = A1,G3 updated T1 ASID to (T1,A1,G3) check_and_switch_context(T2,A1,G2) new_context() check_and_switch_context(A1,G2) matches reserved_asids[P2] reserved_asids[P2] = A1,G3 updated T2 ASID to (T2,A1,G3) At this point, we have two tasks, T1 and T2 both using ASID A1 with the latest generation G3. Any of them is allowed to be scheduled on the other CPU leading to two different tasks with the same ASID on the same CPU. This patch changes the xchg to cmpxchg so that the active_asids is only updated if non-zero to avoid a race with an ASID roll-over on a different CPU. The ASID allocation algorithm has been formally verified using the TLA+ model checker (see https://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/kernel-tla.git/tree/asidalloc.tla for the spec). Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 1cb3bc92ae5c..1fe71b9fcf35 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -194,26 +194,29 @@ set_asid: void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { unsigned long flags; - u64 asid; + u64 asid, old_active_asid; asid = atomic64_read(&mm->context.id); /* * The memory ordering here is subtle. - * If our ASID matches the current generation, then we update - * our active_asids entry with a relaxed xchg. Racing with a - * concurrent rollover means that either: + * If our active_asids is non-zero and the ASID matches the current + * generation, then we update the active_asids entry with a relaxed + * cmpxchg. Racing with a concurrent rollover means that either: * - * - We get a zero back from the xchg and end up waiting on the + * - We get a zero back from the cmpxchg and end up waiting on the * lock. Taking the lock synchronises with the rollover and so * we are forced to see the updated generation. * - * - We get a valid ASID back from the xchg, which means the + * - We get a valid ASID back from the cmpxchg, which means the * relaxed xchg in flush_context will treat us as reserved * because atomic RmWs are totally ordered for a given location. */ - if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) - && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + old_active_asid = atomic64_read(&per_cpu(active_asids, cpu)); + if (old_active_asid && + !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && + atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu), + old_active_asid, asid)) goto switch_mm_fastpath; raw_spin_lock_irqsave(&cpu_asid_lock, flags); -- cgit v1.2.3 From 95e3de3590e3f2358bb13f013911bc1bfa5d3f53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Jan 2018 18:19:39 +0000 Subject: arm64: Move post_ttbr_update_workaround to C code We will soon need to invoke a CPU-specific function pointer after changing page tables, so move post_ttbr_update_workaround out into C code to make this possible. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 13 ------------- arch/arm64/kernel/entry.S | 2 +- arch/arm64/mm/context.c | 9 +++++++++ arch/arm64/mm/proc.S | 3 +-- 4 files changed, 11 insertions(+), 16 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 215a49213507..a6f90b648655 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -492,19 +492,6 @@ alternative_endif mrs \rd, sp_el0 .endm -/* - * Errata workaround post TTBRx_EL1 update. - */ - .macro post_ttbr_update_workaround -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -alternative_if ARM64_WORKAROUND_CAVIUM_27456 - ic iallu - dsb nsh - isb -alternative_else_nop_endif -#endif - .endm - /* * Arrange a physical address in a TTBR register, taking care of 52-bit * addresses. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ceed4877daf..80b539845da6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -277,7 +277,7 @@ alternative_else_nop_endif * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache * corruption). */ - post_ttbr_update_workaround + bl post_ttbr_update_workaround .endif 1: .if \el != 0 diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 1fe71b9fcf35..511bd1e79b69 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -242,6 +242,15 @@ switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } +/* Errata workaround post TTBRx_EL1 update. */ +asmlinkage void post_ttbr_update_workaround(void) +{ + asm(ALTERNATIVE("nop; nop; nop", + "ic iallu; dsb nsh; isb", + ARM64_WORKAROUND_CAVIUM_27456, + CONFIG_CAVIUM_ERRATUM_27456)); +} + static int asids_init(void) { asid_bits = get_cpu_asid_bits(); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index bc334588f234..bc86f7ef8620 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -146,8 +146,7 @@ ENTRY(cpu_do_switch_mm) phys_to_ttbr x0, x2 msr ttbr0_el1, x2 // now update TTBR0 isb - post_ttbr_update_workaround - ret + b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "ax" -- cgit v1.2.3 From 0f15adbb2861ce6f75ccfc5a92b19eae0ef327d0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Jan 2018 11:17:58 +0000 Subject: arm64: Add skeleton to harden the branch predictor against aliasing attacks Aliasing attacks against CPU branch predictors can allow an attacker to redirect speculative control flow on some CPUs and potentially divulge information from one context to another. This patch adds initial skeleton code behind a new Kconfig option to enable implementation-specific mitigations against these attacks for CPUs that are affected. Co-developed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 17 +++++++++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/include/asm/mmu.h | 37 ++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/Makefile | 4 +++ arch/arm64/kernel/bpi.S | 55 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 74 ++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/entry.S | 7 ++-- arch/arm64/mm/context.c | 2 ++ arch/arm64/mm/fault.c | 17 +++++++++ 11 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kernel/bpi.S (limited to 'arch/arm64/mm') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cb7a70e686cb..664fadc2aa2e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -874,6 +874,23 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +config HARDEN_BRANCH_PREDICTOR + bool "Harden the branch predictor against aliasing attacks" if EXPERT + default y + help + Speculation attacks against some high-performance processors rely on + being able to manipulate the branch predictor for a victim context by + executing aliasing branches in the attacker context. Such attacks + can be partially mitigated against by clearing internal branch + predictor state and limiting the prediction logic in some situations. + + This config option will take CPU-specific actions to harden the + branch predictor against aliasing attacks and may rely on specific + instruction sequences or control bits being set by the system + firmware. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b4537ffd1018..51616e77fe6b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -42,7 +42,8 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 +#define ARM64_HARDEN_BRANCH_PREDICTOR 24 -#define ARM64_NCAPS 24 +#define ARM64_NCAPS 25 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 6f7bdb89817f..6dd83d75b82a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -41,6 +41,43 @@ static inline bool arm64_kernel_unmapped_at_el0(void) cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + int hyp_vectors_slot; + bp_hardening_cb_t fn; +}; + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[]; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ + return this_cpu_ptr(&bp_hardening_data); +} + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) + return; + + d = arm64_get_bp_hardening_data(); + if (d->fn) + d->fn(); +} +#else +static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) +{ + return NULL; +} + +static inline void arm64_apply_bp_hardening(void) { } +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 262ae18f0e05..54e99af043c6 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -439,6 +439,7 @@ /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 +#define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 067baace74a0..0c760db04858 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -53,6 +53,10 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +ifeq ($(CONFIG_KVM),y) +arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o +endif + obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S new file mode 100644 index 000000000000..06a931eb2673 --- /dev/null +++ b/arch/arm64/kernel/bpi.S @@ -0,0 +1,55 @@ +/* + * Contains CPU specific branch predictor invalidation sequences + * + * Copyright (C) 2018 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +.macro ventry target + .rept 31 + nop + .endr + b \target +.endm + +.macro vectors target + ventry \target + 0x000 + ventry \target + 0x080 + ventry \target + 0x100 + ventry \target + 0x180 + + ventry \target + 0x200 + ventry \target + 0x280 + ventry \target + 0x300 + ventry \target + 0x380 + + ventry \target + 0x400 + ventry \target + 0x480 + ventry \target + 0x500 + ventry \target + 0x580 + + ventry \target + 0x600 + ventry \target + 0x680 + ventry \target + 0x700 + ventry \target + 0x780 +.endm + + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept 4 + vectors __kvm_hyp_vector + .endr +ENTRY(__bp_harden_hyp_vecs_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0e27f86ee709..16ea5c6f314e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -46,6 +46,80 @@ static int cpu_enable_trap_ctr_access(void *__unused) return 0; } +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +#include +#include + +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +#ifdef CONFIG_KVM +static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K); + int i; + + for (i = 0; i < SZ_2K; i += 0x80) + memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); + + flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); +} + +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + static int last_slot = -1; + static DEFINE_SPINLOCK(bp_lock); + int cpu, slot = -1; + + spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.fn, cpu) == fn) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + last_slot++; + BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start) + / SZ_2K) <= last_slot); + slot = last_slot; + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.fn, fn); + spin_unlock(&bp_lock); +} +#else +static void __install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + __this_cpu_write(bp_hardening_data.fn, fn); +} +#endif /* CONFIG_KVM */ + +static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, + bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) +{ + u64 pfr0; + + if (!entry->matches(entry, SCOPE_LOCAL_CPU)) + return; + + pfr0 = read_cpuid(ID_AA64PFR0_EL1); + if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) + return; + + __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); +} +#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ + #define MIDR_RANGE(model, min, max) \ .def_scope = SCOPE_LOCAL_CPU, \ .matches = is_affected_midr_range, \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 991922b45d3d..da6722db50b0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -147,6 +147,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 80b539845da6..07a7d4db8ec4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -721,12 +721,15 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 - enable_daif + enable_da_f +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp - bl do_mem_abort + bl do_el0_ia_bp_hardening b ret_to_user el0_fpsimd_acc: /* diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 511bd1e79b69..ff99a880a730 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -249,6 +249,8 @@ asmlinkage void post_ttbr_update_workaround(void) "ic iallu; dsb nsh; isb", ARM64_WORKAROUND_CAVIUM_27456, CONFIG_CAVIUM_ERRATUM_27456)); + + arm64_apply_bp_hardening(); } static int asids_init(void) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 22168cd0dde7..0e671ddf4855 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -708,6 +708,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die("", regs, &info, esr); } +asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + /* + * We've taken an instruction abort from userspace and not yet + * re-enabled IRQs. If the address is a kernel address, apply + * BP hardening prior to enabling IRQs and pre-emption. + */ + if (addr > TASK_SIZE) + arm64_apply_bp_hardening(); + + local_irq_enable(); + do_mem_abort(addr, esr, regs); +} + + asmlinkage void __exception do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) -- cgit v1.2.3 From 6d99b68933fbcf51f84fcbba49246ce1209ec193 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 8 Jan 2018 15:38:06 +0000 Subject: arm64: alternatives: use tpidr_el2 on VHE hosts Now that KVM uses tpidr_el2 in the same way as Linux's cpu_offset in tpidr_el1, merge the two. This saves KVM from save/restoring tpidr_el1 on VHE hosts, and allows future code to blindly access per-cpu variables without triggering world-switch. Signed-off-by: James Morse Reviewed-by: Christoffer Dall Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative.h | 2 ++ arch/arm64/include/asm/assembler.h | 8 ++++++++ arch/arm64/include/asm/percpu.h | 11 +++++++++-- arch/arm64/kernel/alternative.c | 9 +++++---- arch/arm64/kernel/cpufeature.c | 17 +++++++++++++++++ arch/arm64/mm/proc.S | 8 ++++++++ 6 files changed, 49 insertions(+), 6 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 4a85c6952a22..669028172fd6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -12,6 +12,8 @@ #include #include +extern int alternatives_applied; + struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index a6f90b648655..5dc4856f3bb9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -254,7 +254,11 @@ lr .req x30 // link register #else adr_l \dst, \sym #endif +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif add \dst, \dst, \tmp .endm @@ -265,7 +269,11 @@ lr .req x30 // link register */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs \tmp, tpidr_el1 +alternative_else + mrs \tmp, tpidr_el2 +alternative_endif ldr \dst, [\dst, \tmp] .endm diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 3bd498e4de4c..43393208229e 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,11 +16,15 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#include #include static inline void set_my_cpu_offset(unsigned long off) { - asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); + asm volatile(ALTERNATIVE("msr tpidr_el1, %0", + "msr tpidr_el2, %0", + ARM64_HAS_VIRT_HOST_EXTN) + :: "r" (off) : "memory"); } static inline unsigned long __my_cpu_offset(void) @@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void) * We want to allow caching the value, so avoid using volatile and * instead use a fake stack read to hazard against barrier(). */ - asm("mrs %0, tpidr_el1" : "=r" (off) : + asm(ALTERNATIVE("mrs %0, tpidr_el1", + "mrs %0, tpidr_el2", + ARM64_HAS_VIRT_HOST_EXTN) + : "=r" (off) : "Q" (*(const unsigned long *)current_stack_pointer)); return off; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 6dd0a3a3e5c9..414288a558c8 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -32,6 +32,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) +int alternatives_applied; + struct alt_region { struct alt_instr *begin; struct alt_instr *end; @@ -143,7 +145,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) */ static int __apply_alternatives_multi_stop(void *unused) { - static int patched = 0; struct alt_region region = { .begin = (struct alt_instr *)__alt_instructions, .end = (struct alt_instr *)__alt_instructions_end, @@ -151,14 +152,14 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(patched)) + while (!READ_ONCE(alternatives_applied)) cpu_relax(); isb(); } else { - BUG_ON(patched); + BUG_ON(alternatives_applied); __apply_alternatives(®ion, true); /* Barriers provided by the cache flushing */ - WRITE_ONCE(patched, 1); + WRITE_ONCE(alternatives_applied, 1); } return 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index da6722db50b0..9ef84d0def9a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -886,6 +886,22 @@ static int __init parse_kpti(char *str) __setup("kpti=", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +static int cpu_copy_el2regs(void *__unused) +{ + /* + * Copy register values that aren't redirected by hardware. + * + * Before code patching, we only set tpidr_el1, all CPUs need to copy + * this value to tpidr_el2 before we patch the code. Once we've done + * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to + * do anything here. + */ + if (!alternatives_applied) + write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); + + return 0; +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -955,6 +971,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .capability = ARM64_HAS_VIRT_HOST_EXTN, .def_scope = SCOPE_SYSTEM, .matches = runs_at_el2, + .enable = cpu_copy_el2regs, }, { .desc = "32-bit EL0 Support", diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index bc86f7ef8620..5a59eea49395 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -70,7 +70,11 @@ ENTRY(cpu_do_suspend) mrs x8, mdscr_el1 mrs x9, oslsr_el1 mrs x10, sctlr_el1 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x11, tpidr_el1 +alternative_else + mrs x11, tpidr_el2 +alternative_endif mrs x12, sp_el0 stp x2, x3, [x0] stp x4, xzr, [x0, #16] @@ -116,7 +120,11 @@ ENTRY(cpu_do_resume) msr mdscr_el1, x10 msr sctlr_el1, x12 +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN msr tpidr_el1, x13 +alternative_else + msr tpidr_el2, x13 +alternative_endif msr sp_el0, x14 /* * Restore oslsr_el1 by writing oslar_el1 -- cgit v1.2.3 From 83f8ee3a73f551aebb5b0bb029feaf6936615730 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 8 Jan 2018 15:38:17 +0000 Subject: arm64: mmu: add the entry trampolines start/end section markers into sections.h SDEI needs to calculate an offset in the trampoline page too. Move the extern char[] to sections.h. This patch just moves code around. Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/mm/mmu.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 941267caa39c..caab039d6305 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -28,5 +28,6 @@ extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; +extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 4071602031ed..62d7abb2f710 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -529,8 +529,6 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { - extern char __entry_tramp_text_start[]; - pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); -- cgit v1.2.3 From 0370b31e48454d8cf11120664aedd1c51b3004cb Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 11 Jan 2018 10:11:59 +0000 Subject: arm64: Extend early page table code to allow for larger kernels Currently the early assembler page table code assumes that precisely 1xpgd, 1xpud, 1xpmd are sufficient to represent the early kernel text mappings. Unfortunately this is rarely the case when running with a 16KB granule, and we also run into limits with 4KB granule when building much larger kernels. This patch re-writes the early page table logic to compute indices of mappings for each level of page table, and if multiple indices are required, the next-level page table is scaled up accordingly. Also the required size of the swapper_pg_dir is computed at link time to cover the mapping [KIMAGE_ADDR + VOFFSET, _end]. When KASLR is enabled, an extra page is set aside for each level that may require extra entries at runtime. Tested-by: Ard Biesheuvel Reviewed-by: Ard Biesheuvel Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kernel-pgtable.h | 47 ++++++++++- arch/arm64/include/asm/pgtable.h | 1 + arch/arm64/kernel/head.S | 144 +++++++++++++++++++++++--------- arch/arm64/kernel/vmlinux.lds.S | 1 + arch/arm64/mm/mmu.c | 3 +- 5 files changed, 156 insertions(+), 40 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 77a27af01371..82386e860dd2 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -52,7 +52,52 @@ #define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) #endif -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) + +/* + * If KASLR is enabled, then an offset K is added to the kernel address + * space. The bottom 21 bits of this offset are zero to guarantee 2MB + * alignment for PA and VA. + * + * For each pagetable level of the swapper, we know that the shift will + * be larger than 21 (for the 4KB granule case we use section maps thus + * the smallest shift is actually 30) thus there is the possibility that + * KASLR can increase the number of pagetable entries by 1, so we make + * room for this extra entry. + * + * Note KASLR cannot increase the number of required entries for a level + * by more than one because it increments both the virtual start and end + * addresses equally (the extra entry comes from the case where the end + * address is just pushed over a boundary and the start address isn't). + */ + +#ifdef CONFIG_RANDOMIZE_BASE +#define EARLY_KASLR (1) +#else +#define EARLY_KASLR (0) +#endif + +#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \ + - ((vstart) >> (shift)) + 1 + EARLY_KASLR) + +#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) + +#if SWAPPER_PGTABLE_LEVELS > 3 +#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT)) +#else +#define EARLY_PUDS(vstart, vend) (0) +#endif + +#if SWAPPER_PGTABLE_LEVELS > 2 +#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT)) +#else +#define EARLY_PMDS(vstart, vend) (0) +#endif + +#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \ + + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \ + + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \ + + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */ +#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) #ifdef CONFIG_ARM64_SW_TTBR0_PAN diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index bfa237e892f1..54b0a8398055 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -706,6 +706,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, #endif extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern pgd_t swapper_pg_end[]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 66f01869e97c..95748a00eb89 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -191,44 +191,109 @@ ENDPROC(preserve_boot_args) .endm /* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. + * Macro to populate page table entries, these entries can be pointers to the next level + * or last level entries pointing to physical memory. * - * Preserves: tbl, next, virt - * Corrupts: ptrs_per_pgd, tmp1, tmp2 + * tbl: page table address + * rtbl: pointer to page table or physical memory + * index: start index to write + * eindex: end index to write - [index, eindex] written to + * flags: flags for pagetable entry to or in + * inc: increment to rtbl between each entry + * tmp1: temporary variable + * + * Preserves: tbl, eindex, flags, inc + * Corrupts: index, tmp1 + * Returns: rtbl */ - .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS > 3 - mov \ptrs_per_pgd, PTRS_PER_PUD - create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 -#endif -#if SWAPPER_PGTABLE_LEVELS > 2 - mov \ptrs_per_pgd, PTRS_PER_PTE - create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2 -#endif + .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 +.Lpe\@: phys_to_pte \rtbl, \tmp1 + orr \tmp1, \tmp1, \flags // tmp1 = table entry + str \tmp1, [\tbl, \index, lsl #3] + add \rtbl, \rtbl, \inc // rtbl = pa next level + add \index, \index, #1 + cmp \index, \eindex + b.ls .Lpe\@ + .endm + +/* + * Compute indices of table entries from virtual address range. If multiple entries + * were needed in the previous page table level then the next page table level is assumed + * to be composed of multiple pages. (This effectively scales the end index). + * + * vstart: virtual address of start of range + * vend: virtual address of end of range + * shift: shift used to transform virtual address into index + * ptrs: number of entries in page table + * istart: index in table corresponding to vstart + * iend: index in table corresponding to vend + * count: On entry: how many extra entries were required in previous level, scales + * our end index. + * On exit: returns how many extra entries required for next page table level + * + * Preserves: vstart, vend, shift, ptrs + * Returns: istart, iend, count + */ + .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count + lsr \iend, \vend, \shift + mov \istart, \ptrs + sub \istart, \istart, #1 + and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1) + mov \istart, \ptrs + mul \istart, \istart, \count + add \iend, \iend, \istart // iend += (count - 1) * ptrs + // our entries span multiple tables + + lsr \istart, \vstart, \shift + mov \count, \ptrs + sub \count, \count, #1 + and \istart, \istart, \count + + sub \count, \iend, \istart .endm /* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). + * Map memory for specified virtual address range. Each level of page table needed supports + * multiple entries. If a level requires n entries the next page table level is assumed to be + * formed from n pages. + * + * tbl: location of page table + * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) + * vstart: start address to map + * vend: end address to map - we map [vstart, vend] + * flags: flags to use to map last level entries + * phys: physical address corresponding to vstart - physical memory is contiguous + * pgds: the number of pgd entries * - * Preserves: tbl, flags - * Corrupts: phys, start, end, tmp, pstate + * Temporaries: istart, iend, tmp, count, sv - these need to be different registers + * Preserves: vstart, vend, flags + * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv */ - .macro create_block_map, tbl, flags, phys, start, end, tmp - lsr \start, \start, #SWAPPER_BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - bic \phys, \phys, #SWAPPER_BLOCK_SIZE - 1 - lsr \end, \end, #SWAPPER_BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: phys_to_pte \phys, \tmp - orr \tmp, \tmp, \flags // table entry - str \tmp, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + add \rtbl, \tbl, #PAGE_SIZE + mov \sv, \rtbl + mov \count, #0 + compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + mov \tbl, \sv + mov \sv, \rtbl + +#if SWAPPER_PGTABLE_LEVELS > 3 + compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + mov \tbl, \sv + mov \sv, \rtbl +#endif + +#if SWAPPER_PGTABLE_LEVELS > 2 + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + mov \tbl, \sv +#endif + + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count + bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1 + populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp .endm /* @@ -246,14 +311,16 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + adrp x1, swapper_pg_end + sub x1, x1, x0 bl __inval_dcache_area /* * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + adrp x1, swapper_pg_end + sub x1, x1, x0 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 @@ -318,10 +385,10 @@ __create_page_tables: #endif 1: ldr_l x4, idmap_ptrs_per_pgd - create_pgd_entry x0, x3, x4, x5, x6 mov x5, x3 // __pa(__idmap_text_start) adr_l x6, __idmap_text_end // __pa(__idmap_text_end) - create_block_map x0, x7, x3, x5, x6, x4 + + map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -330,12 +397,12 @@ __create_page_tables: mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) add x5, x5, x23 // add KASLR displacement mov x4, PTRS_PER_PGD - create_pgd_entry x0, x5, x4, x3, x6 adrp x6, _end // runtime __pa(_end) adrp x3, _text // runtime __pa(_text) sub x6, x6, x3 // _end - _text add x6, x6, x5 // runtime __va(_end) - create_block_map x0, x7, x3, x5, x6, x4 + + map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14 /* * Since the page tables have been populated with non-cacheable @@ -343,7 +410,8 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + adrp x1, swapper_pg_end + sub x1, x1, x0 dmb sy bl __inval_dcache_area diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4c7112a47469..0221aca6493d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -230,6 +230,7 @@ SECTIONS #endif swapper_pg_dir = .; . += SWAPPER_DIR_SIZE; + swapper_pg_end = .; __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 62d7abb2f710..b44992ec9643 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -642,7 +642,8 @@ void __init paging_init(void) * allocated with it. */ memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, - SWAPPER_DIR_SIZE - PAGE_SIZE); + __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir) + - PAGE_SIZE); } /* -- cgit v1.2.3 From 071929dbdd865f779a89ba3f1e06ba8d17dd3743 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 19 Dec 2017 11:28:10 -0800 Subject: arm64: Stop printing the virtual memory layout Printing kernel addresses should be done in limited circumstances, mostly for debugging purposes. Printing out the virtual memory layout at every kernel bootup doesn't really fall into this category so delete the prints. There are other ways to get the same information. Acked-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5960bef0170d..672094ed7e07 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -599,49 +599,6 @@ void __init mem_init(void) mem_init_print_info(NULL); -#define MLK(b, t) b, t, ((t) - (b)) >> 10 -#define MLM(b, t) b, t, ((t) - (b)) >> 20 -#define MLG(b, t) b, t, ((t) - (b)) >> 30 -#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) - - pr_notice("Virtual kernel memory layout:\n"); -#ifdef CONFIG_KASAN - pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", - MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); -#endif - pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", - MLM(MODULES_VADDR, MODULES_END)); - pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", - MLG(VMALLOC_START, VMALLOC_END)); - pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, _etext)); - pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__start_rodata, __init_begin)); - pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__init_begin, __init_end)); - pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_sdata, _edata)); - pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__bss_start, __bss_stop)); - pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", - MLK(FIXADDR_START, FIXADDR_TOP)); - pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", - MLM(PCI_IO_START, PCI_IO_END)); -#ifdef CONFIG_SPARSEMEM_VMEMMAP - pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", - MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); - pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", - MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), - (unsigned long)virt_to_page(high_memory))); -#endif - pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", - MLM(__phys_to_virt(memblock_start_of_DRAM()), - (unsigned long)high_memory)); - -#undef MLK -#undef MLM -#undef MLK_ROUNDUP - /* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. -- cgit v1.2.3 From 7a00d68ebe5f07cb1db17e7fedfd031f0d87e8bb Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 15 Jan 2018 19:38:55 +0000 Subject: arm64: sysreg: Move to use definitions for all the SCTLR bits __cpu_setup() configures SCTLR_EL1 using some hard coded hex masks, and el2_setup() duplicates some this when setting RES1 bits. Lets make this the same as KVM's hyp_init, which uses named bits. First, we add definitions for all the SCTLR_EL{1,2} bits, the RES{1,0} bits, and those we want to set or clear. Add a build_bug checks to ensures all bits are either set or clear. This means we don't need to preserve endian-ness configuration generated elsewhere. Finally, move the head.S and proc.S users of these hard-coded masks over to the macro versions. Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 65 +++++++++++++++++++++++++++++++++++++++-- arch/arm64/kernel/head.S | 13 ++------- arch/arm64/mm/proc.S | 24 +-------------- 3 files changed, 67 insertions(+), 35 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 54e99af043c6..1a8108f84932 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,7 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include #include /* @@ -398,25 +399,81 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_WXN (1 << 19) #define SCTLR_ELx_I (1 << 12) #define SCTLR_ELx_SA (1 << 3) #define SCTLR_ELx_C (1 << 2) #define SCTLR_ELx_A (1 << 1) #define SCTLR_ELx_M 1 +#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I) + +/* SCTLR_EL2 specific flags. */ #define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ (1 << 29)) +#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ + (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ + (1 << 17) | (1 << 20) | (1 << 21) | (1 << 24) | \ + (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2 SCTLR_ELx_EE +#define ENDIAN_CLEAR_EL2 0 +#else +#define ENDIAN_SET_EL2 0 +#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE +#endif + +/* SCTLR_EL2 value used for the hyp-stub */ +#define SCTLR_EL2_SET (ENDIAN_SET_EL2 | SCTLR_EL2_RES1) +#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ + ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0) -#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ - SCTLR_ELx_SA | SCTLR_ELx_I) /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) +#define SCTLR_EL1_E0E (1 << 24) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_NTWE (1 << 18) +#define SCTLR_EL1_NTWI (1 << 16) #define SCTLR_EL1_UCT (1 << 15) +#define SCTLR_EL1_DZE (1 << 14) +#define SCTLR_EL1_UMA (1 << 9) #define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_ITD (1 << 7) #define SCTLR_EL1_CP15BEN (1 << 5) +#define SCTLR_EL1_SA0 (1 << 4) + +#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ + (1 << 29)) +#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ + (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#define ENDIAN_CLEAR_EL1 0 +#else +#define ENDIAN_SET_EL1 0 +#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#endif + +#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ + SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_NTWE | SCTLR_EL1_SPAN | ENDIAN_SET_EL1 |\ + SCTLR_EL1_UCI | SCTLR_EL1_RES1) +#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ + SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ + SCTLR_EL1_RES0) + +/* Check all the bits are accounted for */ +#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0) /* id_aa64isar0 */ #define ID_AA64ISAR0_FHM_SHIFT 48 @@ -593,6 +650,7 @@ #else +#include #include asm( @@ -649,6 +707,9 @@ static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; + SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS; + SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS; + val = read_sysreg(sctlr_el1); val &= ~clear; val |= set; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 95748a00eb89..c3b241b8b659 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -492,17 +492,13 @@ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq 1f - mrs x0, sctlr_el1 -CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 -CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret -1: mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 +1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE @@ -618,10 +614,7 @@ install_el2_stub: * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5a59eea49395..ec4c3d82b4d6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -226,11 +226,7 @@ ENTRY(__cpu_setup) /* * Prepare SCTLR */ - adr x5, crval - ldp w5, w6, [x5] - mrs x0, sctlr_el1 - bic x0, x0, x5 // clear bits - orr x0, x0, x6 // set bits + mov_q x0, SCTLR_EL1_SET /* * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. @@ -259,21 +255,3 @@ ENTRY(__cpu_setup) msr tcr_el1, x10 ret // return to head.S ENDPROC(__cpu_setup) - - /* - * We set the desired value explicitly, including those of the - * reserved bits. The values of bits EE & E0E were set early in - * el2_setup, which are left untouched below. - * - * n n T - * U E WT T UD US IHBS - * CE0 XWHW CZ ME TEEA S - * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM - * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved - * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings - */ - .type crval, #object -crval: - .word 0xfcffffff // clear - .word 0x34d5d91d // set - .popsection -- cgit v1.2.3 From 68ddbf09ec5a888ec850edd7e7438d2daf069c56 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 15 Jan 2018 19:38:59 +0000 Subject: arm64: kernel: Prepare for a DISR user KVM would like to consume any pending SError (or RAS error) after guest exit. Today it has to unmask SError and use dsb+isb to synchronise the CPU. With the RAS extensions we can use ESB to synchronise any pending SError. Add the necessary macros to allow DISR to be read and converted to an ESR. We clear the DISR register when we enable the RAS cpufeature, and the kernel has not executed any ESB instructions. Any value we find in DISR must have belonged to firmware. Executing an ESB instruction is the only way to update DISR, so we can expect firmware to have handled any deferred SError. By the same logic we clear DISR in the idle path. Reviewed-by: Suzuki K Poulose Signed-off-by: James Morse Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 7 +++++++ arch/arm64/include/asm/esr.h | 7 +++++++ arch/arm64/include/asm/exception.h | 14 ++++++++++++++ arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 9 +++++++++ arch/arm64/mm/proc.S | 5 +++++ 7 files changed, 44 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 1645e0d3a2c1..794fe8122602 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -108,6 +108,13 @@ dmb \opt .endm +/* + * RAS Error Synchronization barrier + */ + .macro esb + hint #16 + .endm + /* * NOP sequence */ diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index c367838700fa..803443d74926 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -140,6 +140,13 @@ #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) +#define DISR_EL1_IDS (UL(1) << 24) +/* + * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean + * different things in the future... + */ +#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) + /* ESR value templates for specific events */ /* BRK instruction trap from AArch64 state */ diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0c2eec490abf..bc30429d8e91 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -18,6 +18,8 @@ #ifndef __ASM_EXCEPTION_H #define __ASM_EXCEPTION_H +#include + #include #define __exception __attribute__((section(".exception.text"))) @@ -27,4 +29,16 @@ #define __exception_irq_entry __exception #endif +static inline u32 disr_to_esr(u64 disr) +{ + unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT; + + if ((disr & DISR_EL1_IDS) == 0) + esr |= (disr & DISR_EL1_ESR_MASK); + else + esr |= (disr & ESR_ELx_ISS_MASK); + + return esr; +} + #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 023cacb946c3..cee4ae25a5d1 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -216,6 +216,7 @@ static inline void spin_lock_prefetch(const void *ptr) int cpu_enable_pan(void *__unused); int cpu_enable_cache_maint_trap(void *__unused); +int cpu_clear_disr(void *__unused); /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1281bc8263c2..b5d543fc677d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -279,6 +279,7 @@ #define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0) #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) #define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) #define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 116a5c9443a7..5612d6f46331 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1039,6 +1039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, .min_field_value = ID_AA64PFR0_RAS_V1, + .enable = cpu_clear_disr, }, #endif /* CONFIG_ARM64_RAS_EXTN */ {}, @@ -1470,3 +1471,11 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); + +int cpu_clear_disr(void *__unused) +{ + /* Firmware may have left a deferred SError in this register. */ + write_sysreg_s(0, SYS_DISR_EL1); + + return 0; +} diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index ec4c3d82b4d6..05e4ae934b23 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -132,6 +132,11 @@ alternative_endif ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 + +alternative_if ARM64_HAS_RAS_EXTN + msr_s SYS_DISR_EL1, xzr +alternative_else_nop_endif + isb ret ENDPROC(cpu_do_resume) -- cgit v1.2.3 From 6b88a32c7af68895134872cdec3b6bfdb532d94e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 10 Jan 2018 13:18:30 +0000 Subject: arm64: kpti: Fix the interaction between ASID switching and software PAN With ARM64_SW_TTBR0_PAN enabled, the exception entry code checks the active ASID to decide whether user access was enabled (non-zero ASID) when the exception was taken. On return from exception, if user access was previously disabled, it re-instates TTBR0_EL1 from the per-thread saved value (updated in switch_mm() or efi_set_pgd()). Commit 7655abb95386 ("arm64: mm: Move ASID from TTBR0 to TTBR1") makes a TTBR0_EL1 + ASID switching non-atomic. Subsequently, commit 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") changes the __uaccess_ttbr0_disable() function and asm macro to first write the reserved TTBR0_EL1 followed by the ASID=0 update in TTBR1_EL1. If an exception occurs between these two, the exception return code will re-instate a valid TTBR0_EL1. Similar scenario can happen in cpu_switch_mm() between setting the reserved TTBR0_EL1 and the ASID update in cpu_do_switch_mm(). This patch reverts the entry.S check for ASID == 0 to TTBR0_EL1 and disables the interrupts around the TTBR0_EL1 and ASID switching code in __uaccess_ttbr0_disable(). It also ensures that, when returning from the EFI runtime services, efi_set_pgd() doesn't leave a non-zero ASID in TTBR1_EL1 by using uaccess_ttbr0_{enable,disable}. The accesses to current_thread_info()->ttbr0 are updated to use READ_ONCE/WRITE_ONCE. As a safety measure, __uaccess_ttbr0_enable() always masks out any existing non-zero ASID TTBR1_EL1 before writing in the new ASID. Fixes: 27a921e75711 ("arm64: mm: Fix and re-enable ARM64_SW_TTBR0_PAN") Acked-by: Will Deacon Reported-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Reviewed-by: James Morse Tested-by: James Morse Co-developed-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/asm-uaccess.h | 12 +++++++----- arch/arm64/include/asm/efi.h | 12 +++++++----- arch/arm64/include/asm/mmu_context.h | 3 ++- arch/arm64/include/asm/uaccess.h | 9 ++++++--- arch/arm64/kernel/entry.S | 2 +- arch/arm64/lib/clear_user.S | 2 +- arch/arm64/lib/copy_from_user.S | 2 +- arch/arm64/lib/copy_in_user.S | 2 +- arch/arm64/lib/copy_to_user.S | 2 +- arch/arm64/mm/cache.S | 2 +- arch/arm64/mm/proc.S | 3 +++ arch/arm64/xen/hypercall.S | 2 +- 12 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 8719ce122a38..4128bec033f6 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -14,11 +14,11 @@ #ifdef CONFIG_ARM64_SW_TTBR0_PAN .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir + bic \tmp1, \tmp1, #TTBR_ASID_MASK sub \tmp1, \tmp1, #RESERVED_TTBR0_SIZE // reserved_ttbr0 just before swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb add \tmp1, \tmp1, #RESERVED_TTBR0_SIZE - bic \tmp1, \tmp1, #TTBR_ASID_MASK msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm @@ -35,9 +35,11 @@ isb .endm - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 alternative_if_not ARM64_HAS_PAN + save_and_disable_irq \tmp2 // avoid preemption __uaccess_ttbr0_disable \tmp1 + restore_irq \tmp2 alternative_else_nop_endif .endm @@ -49,7 +51,7 @@ alternative_if_not ARM64_HAS_PAN alternative_else_nop_endif .endm #else - .macro uaccess_ttbr0_disable, tmp1 + .macro uaccess_ttbr0_disable, tmp1, tmp2 .endm .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3 @@ -59,8 +61,8 @@ alternative_else_nop_endif /* * These macros are no-ops when UAO is present. */ - .macro uaccess_disable_not_uao, tmp1 - uaccess_ttbr0_disable \tmp1 + .macro uaccess_disable_not_uao, tmp1, tmp2 + uaccess_ttbr0_disable \tmp1, \tmp2 alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(1) alternative_else_nop_endif diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index c4cd5081d78b..8389050328bb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -121,19 +121,21 @@ static inline void efi_set_pgd(struct mm_struct *mm) if (mm != current->active_mm) { /* * Update the current thread's saved ttbr0 since it is - * restored as part of a return from exception. Set - * the hardware TTBR0_EL1 using cpu_switch_mm() - * directly to enable potential errata workarounds. + * restored as part of a return from exception. Enable + * access to the valid TTBR0_EL1 and invoke the errata + * workaround directly since there is no return from + * exception when invoking the EFI run-time services. */ update_saved_ttbr0(current, mm); - cpu_switch_mm(mm->pgd, mm); + uaccess_ttbr0_enable(); + post_ttbr_update_workaround(); } else { /* * Defer the switch to the current thread's TTBR0_EL1 * until uaccess_enable(). Restore the current * thread's saved ttbr0 corresponding to its active_mm */ - cpu_set_reserved_ttbr0(); + uaccess_ttbr0_disable(); update_saved_ttbr0(current, current->active_mm); } } diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 5aee19905556..8d3331985d2e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -184,7 +184,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, else ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; - task_thread_info(tsk)->ttbr0 = ttbr; + WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); } #else static inline void update_saved_ttbr0(struct task_struct *tsk, @@ -239,6 +239,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev,next) switch_mm(prev, next, current) void verify_cpu_asid_bits(void); +void post_ttbr_update_workaround(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 01ade20d5456..59fda5292936 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -105,16 +105,18 @@ static inline void set_fs(mm_segment_t fs) #ifdef CONFIG_ARM64_SW_TTBR0_PAN static inline void __uaccess_ttbr0_disable(void) { - unsigned long ttbr; + unsigned long flags, ttbr; + local_irq_save(flags); ttbr = read_sysreg(ttbr1_el1); + ttbr &= ~TTBR_ASID_MASK; /* reserved_ttbr0 placed before swapper_pg_dir */ write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ - ttbr &= ~TTBR_ASID_MASK; write_sysreg(ttbr, ttbr1_el1); isb(); + local_irq_restore(flags); } static inline void __uaccess_ttbr0_enable(void) @@ -127,10 +129,11 @@ static inline void __uaccess_ttbr0_enable(void) * roll-over and an update of 'ttbr0'. */ local_irq_save(flags); - ttbr0 = current_thread_info()->ttbr0; + ttbr0 = READ_ONCE(current_thread_info()->ttbr0); /* Restore active ASID */ ttbr1 = read_sysreg(ttbr1_el1); + ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */ ttbr1 |= ttbr0 & TTBR_ASID_MASK; write_sysreg(ttbr1, ttbr1_el1); isb(); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1e1d1842888d..b34e717d7597 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -204,7 +204,7 @@ alternative_if ARM64_HAS_PAN alternative_else_nop_endif .if \el != 0 - mrs x21, ttbr1_el1 + mrs x21, ttbr0_el1 tst x21, #TTBR_ASID_MASK // Check for the reserved ASID orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR b.eq 1f // TTBR0 access already disabled diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 8f9c4641e706..3d69a8d41fa5 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 b.mi 5f uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 - uaccess_disable_not_uao x2 + uaccess_disable_not_uao x2, x3 ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 69d86a80f3e2..20305d485046 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -67,7 +67,7 @@ ENTRY(__arch_copy_from_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 // Nothing to copy ret ENDPROC(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index e442b531252a..fbb090f431a5 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -68,7 +68,7 @@ ENTRY(raw_copy_in_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(raw_copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 318f15d5c336..fda6172d6b88 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -66,7 +66,7 @@ ENTRY(__arch_copy_to_user) uaccess_enable_not_uao x3, x4, x5 add end, x0, x2 #include "copy_template.S" - uaccess_disable_not_uao x3 + uaccess_disable_not_uao x3, x4 mov x0, #0 ret ENDPROC(__arch_copy_to_user) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 6cd20a8c0952..91464e7f77cc 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -72,7 +72,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU isb mov x0, #0 1: - uaccess_ttbr0_disable x1 + uaccess_ttbr0_disable x1, x2 ret 9: mov x0, #-EFAULT diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 05e4ae934b23..c6a12073ef46 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -153,6 +153,9 @@ ENDPROC(cpu_do_resume) ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id +#ifdef CONFIG_ARM64_SW_TTBR0_PAN + bfi x0, x1, #48, #16 // set the ASID field in TTBR0 +#endif bfi x2, x1, #48, #16 // set the ASID msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index acdbd2c9e899..c5f05c4a4d00 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -107,6 +107,6 @@ ENTRY(privcmd_call) /* * Disable userspace access from kernel once the hyp call completed. */ - uaccess_ttbr0_disable x6 + uaccess_ttbr0_disable x6, x7 ret ENDPROC(privcmd_call); -- cgit v1.2.3 From e9eaa8052fe71b95f4fea6072fa3e0b2cf0b620f Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Thu, 18 Jan 2018 19:13:11 +0000 Subject: arm64: mm: ignore memory above supported physical address size When booting a kernel without 52-bit PA support (e.g. a kernel with 4k pages) on a system with 52-bit memory, the kernel will currently try to use the 52-bit memory and crash. Fix this by ignoring any memory higher than what the kernel supports. Fixes: f77d281713d4 ("arm64: enable 52-bit physical address support") Signed-off-by: Kristina Martsenko Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 672094ed7e07..285745b2ca38 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -366,6 +366,9 @@ void __init arm64_memblock_init(void) /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); + /* Remove memory above our supported physical address size */ + memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); + /* * Ensure that the linear region takes up exactly half of the kernel * virtual address space. This way, we can distinguish a linear address -- cgit v1.2.3 From a8e4c0a919ae310944ed2c9ace11cf3ccd8a609b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Jan 2018 15:42:09 +0000 Subject: arm64: Move BP hardening to check_and_switch_context We call arm64_apply_bp_hardening() from post_ttbr_update_workaround, which has the unexpected consequence of being triggered on every exception return to userspace when ARM64_SW_TTBR0_PAN is selected, even if no context switch actually occured. This is a bit suboptimal, and it would be more logical to only invalidate the branch predictor when we actually switch to a different mm. In order to solve this, move the call to arm64_apply_bp_hardening() into check_and_switch_context(), where we're guaranteed to pick a different mm context. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ff99a880a730..301417ae2ba8 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -234,6 +234,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: + + arm64_apply_bp_hardening(); + /* * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when * emulating PAN. @@ -249,8 +252,6 @@ asmlinkage void post_ttbr_update_workaround(void) "ic iallu; dsb nsh; isb", ARM64_WORKAROUND_CAVIUM_27456, CONFIG_CAVIUM_ERRATUM_27456)); - - arm64_apply_bp_hardening(); } static int asids_init(void) -- cgit v1.2.3 From ec89ab50a03a33a4a648869e868b1964354fb2d1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 24 Jan 2018 08:27:08 +0000 Subject: arm64: Fix TTBR + PAN + 52-bit PA logic in cpu_do_switch_mm In cpu_do_switch_mm(.) with ARM64_SW_TTBR0_PAN=y we apply phys_to_ttbr to a value that already has an ASID inserted into the upper bits. For 52-bit PA configurations this then can give us TTBR0_EL1 registers that cause translation table walks to attempt to access non-zero PA[51:48] spuriously. Ultimately leading to a Synchronous External Abort on level 1 translation. This patch re-arranges the logic in cpu_do_switch_mm(.) such that phys_to_ttbr is called before the ASID is inserted into the TTBR0 value. Fixes: 6b88a32c7af6 ("arm64: kpti: Fix the interaction between ASID switching and software PAN") Acked-by: Suzuki K Poulose Tested-by: Kristina Martsenko Reviewed-by: Kristina Martsenko Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/mm') diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c6a12073ef46..9f177aac6390 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -153,14 +153,14 @@ ENDPROC(cpu_do_resume) ENTRY(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id + phys_to_ttbr x0, x3 #ifdef CONFIG_ARM64_SW_TTBR0_PAN - bfi x0, x1, #48, #16 // set the ASID field in TTBR0 + bfi x3, x1, #48, #16 // set the ASID field in TTBR0 #endif bfi x2, x1, #48, #16 // set the ASID msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set) isb - phys_to_ttbr x0, x2 - msr ttbr0_el1, x2 // now update TTBR0 + msr ttbr0_el1, x3 // now update TTBR0 isb b post_ttbr_update_workaround // Back to C code... ENDPROC(cpu_do_switch_mm) -- cgit v1.2.3