From b6ccb9803e90c16b212cf4ed62913a7591e79a39 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 7 Feb 2014 19:12:27 +0100 Subject: ARM: 7954/1: mm: remove remaining domain support from ARMv6 CPU_32v6 currently selects CPU_USE_DOMAINS if CPU_V6 and MMU. This is because ARM 1136 r0pX CPUs lack the v6k extensions, and therefore do not have hardware thread registers. The lack of these registers requires the kernel to update the vectors page at each context switch in order to write a new TLS pointer. This write must be done via the userspace mapping, since aliasing caches can lead to expensive flushing when using kmap. Finally, this requires the vectors page to be mapped r/w for kernel and r/o for user, which has implications for things like put_user which must trigger CoW appropriately when targetting user pages. The upshot of all this is that a v6/v7 kernel makes use of domains to segregate kernel and user memory accesses. This has the nasty side-effect of making device mappings executable, which has been observed to cause subtle bugs on recent cores (e.g. Cortex-A15 performing a speculative instruction fetch from the GIC and acking an interrupt in the process). This patch solves this problem by removing the remaining domain support from ARMv6. A new memory type is added specifically for the vectors page which allows that page (and only that page) to be mapped as user r/o, kernel r/w. All other user r/o pages are mapped also as kernel r/o. Patch co-developed with Russell King. Cc: Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/futex.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm/include/asm/futex.h') diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index e42cf597f6e6..2aff798fbef4 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -3,11 +3,6 @@ #ifdef __KERNEL__ -#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP) -/* ARM doesn't provide unprivileged exclusive memory accessors */ -#include -#else - #include #include #include @@ -164,6 +159,5 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) return ret; } -#endif /* !(CPU_USE_DOMAINS && SMP) */ #endif /* __KERNEL__ */ #endif /* _ASM_ARM_FUTEX_H */ -- cgit v1.2.3 From c32ffce0f66e5d1d4856254516e24f5ef275cd00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 21 Feb 2014 17:01:48 +0100 Subject: ARM: 7984/1: prefetch: add prefetchw invocations for barriered atomics After a bunch of benchmarking on the interaction between dmb and pldw, it turns out that issuing the pldw *after* the dmb instruction can give modest performance gains (~3% atomic_add_return improvement on a dual A15). This patch adds prefetchw invocations to our barriered atomic operations including cmpxchg, test_and_xxx and futexes. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/atomic.h | 9 +++++++++ arch/arm/include/asm/cmpxchg.h | 6 ++++++ arch/arm/include/asm/futex.h | 3 +++ arch/arm/lib/bitops.h | 5 +++++ 4 files changed, 23 insertions(+) (limited to 'arch/arm/include/asm/futex.h') diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 62d2cb53b069..6e410090896e 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -60,6 +60,7 @@ static inline int atomic_add_return(int i, atomic_t *v) int result; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic_add_return\n" "1: ldrex %0, [%3]\n" @@ -99,6 +100,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) int result; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic_sub_return\n" "1: ldrex %0, [%3]\n" @@ -121,6 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) unsigned long res; smp_mb(); + prefetchw(&ptr->counter); do { __asm__ __volatile__("@ atomic_cmpxchg\n" @@ -299,6 +302,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) unsigned long tmp; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_add_return\n" "1: ldrexd %0, %H0, [%3]\n" @@ -340,6 +344,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v) unsigned long tmp; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_sub_return\n" "1: ldrexd %0, %H0, [%3]\n" @@ -364,6 +369,7 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old, unsigned long res; smp_mb(); + prefetchw(&ptr->counter); do { __asm__ __volatile__("@ atomic64_cmpxchg\n" @@ -388,6 +394,7 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new) unsigned long tmp; smp_mb(); + prefetchw(&ptr->counter); __asm__ __volatile__("@ atomic64_xchg\n" "1: ldrexd %0, %H0, [%3]\n" @@ -409,6 +416,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_dec_if_positive\n" "1: ldrexd %0, %H0, [%3]\n" @@ -436,6 +444,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) int ret = 1; smp_mb(); + prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_add_unless\n" "1: ldrexd %0, %H0, [%4]\n" diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index df2fbba7efc8..abb2c3769b01 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -2,6 +2,7 @@ #define __ASM_ARM_CMPXCHG_H #include +#include #include #if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) @@ -35,6 +36,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size #endif smp_mb(); + prefetchw((const void *)ptr); switch (size) { #if __LINUX_ARM_ARCH__ >= 6 @@ -138,6 +140,8 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, { unsigned long oldval, res; + prefetchw((const void *)ptr); + switch (size) { #ifndef CONFIG_CPU_V6 /* min ARCH >= ARMv6K */ case 1: @@ -230,6 +234,8 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr, unsigned long long oldval; unsigned long res; + prefetchw(ptr); + __asm__ __volatile__( "1: ldrexd %1, %H1, [%3]\n" " teq %1, %4\n" diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 2aff798fbef4..53e69dae796f 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -23,6 +23,7 @@ #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ smp_mb(); \ + prefetchw(uaddr); \ __asm__ __volatile__( \ "1: ldrex %1, [%3]\n" \ " " insn "\n" \ @@ -46,6 +47,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; smp_mb(); + /* Prefetching cannot fault */ + prefetchw(uaddr); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: ldrex %1, [%4]\n" " teq %1, %2\n" diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 52886b89706c..9f12ed1eea86 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -37,6 +37,11 @@ UNWIND( .fnstart ) add r1, r1, r0, lsl #2 @ Get word offset mov r3, r2, lsl r3 @ create mask smp_dmb +#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP) + .arch_extension mp + ALT_SMP(W(pldw) [r1]) + ALT_UP(W(nop)) +#endif 1: ldrex r2, [r1] ands r0, r2, r3 @ save old value of bit \instr r2, r2, r3 @ toggle bit -- cgit v1.2.3