From addfc38672c73efd5c4e559a2e455b086e3e20c5 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Wed, 28 Aug 2019 18:50:07 +0100 Subject: arm64: atomics: avoid out-of-line ll/sc atomics When building for LSE atomics (CONFIG_ARM64_LSE_ATOMICS), if the hardware or toolchain doesn't support it the existing code will fallback to ll/sc atomics. It achieves this by branching from inline assembly to a function that is built with special compile flags. Further this results in the clobbering of registers even when the fallback isn't used increasing register pressure. Improve this by providing inline implementations of both LSE and ll/sc and use a static key to select between them, which allows for the compiler to generate better atomics code. Put the LL/SC fallback atomics in their own subsection to improve icache performance. Signed-off-by: Andrew Murray Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/arm64/include/asm/atomic.h') diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 657b0457d83c..c70d3f389d29 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,16 +17,7 @@ #ifdef __KERNEL__ -#define __ARM64_IN_ATOMIC_IMPL - -#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) -#include -#else -#include -#endif - -#undef __ARM64_IN_ATOMIC_IMPL - +#include #include #define ATOMIC_INIT(i) { (i) } -- cgit v1.2.3 From 0533f97b4356bfa8af5d4758c6c3fe703bb010d9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Aug 2019 11:49:10 +0100 Subject: arm64: asm: Kill 'asm/atomic_arch.h' The contents of 'asm/atomic_arch.h' can be split across some of our other 'asm/' headers. Remove it. Reviewed-by: Andrew Murray Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 77 ++++++++++++++++- arch/arm64/include/asm/atomic_arch.h | 155 ----------------------------------- arch/arm64/include/asm/cmpxchg.h | 41 ++++++++- arch/arm64/include/asm/lse.h | 24 ++++++ 4 files changed, 140 insertions(+), 157 deletions(-) delete mode 100644 arch/arm64/include/asm/atomic_arch.h (limited to 'arch/arm64/include/asm/atomic.h') diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index c70d3f389d29..7c334337674d 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,9 +17,84 @@ #ifdef __KERNEL__ -#include #include +#define ATOMIC_OP(op) \ +static inline void arch_##op(int i, atomic_t *v) \ +{ \ + __lse_ll_sc_body(op, i, v); \ +} + +ATOMIC_OP(atomic_andnot) +ATOMIC_OP(atomic_or) +ATOMIC_OP(atomic_xor) +ATOMIC_OP(atomic_add) +ATOMIC_OP(atomic_and) +ATOMIC_OP(atomic_sub) + + +#define ATOMIC_FETCH_OP(name, op) \ +static inline int arch_##op##name(int i, atomic_t *v) \ +{ \ + return __lse_ll_sc_body(op##name, i, v); \ +} + +#define ATOMIC_FETCH_OPS(op) \ + ATOMIC_FETCH_OP(_relaxed, op) \ + ATOMIC_FETCH_OP(_acquire, op) \ + ATOMIC_FETCH_OP(_release, op) \ + ATOMIC_FETCH_OP( , op) + +ATOMIC_FETCH_OPS(atomic_fetch_andnot) +ATOMIC_FETCH_OPS(atomic_fetch_or) +ATOMIC_FETCH_OPS(atomic_fetch_xor) +ATOMIC_FETCH_OPS(atomic_fetch_add) +ATOMIC_FETCH_OPS(atomic_fetch_and) +ATOMIC_FETCH_OPS(atomic_fetch_sub) +ATOMIC_FETCH_OPS(atomic_add_return) +ATOMIC_FETCH_OPS(atomic_sub_return) + + +#define ATOMIC64_OP(op) \ +static inline void arch_##op(long i, atomic64_t *v) \ +{ \ + __lse_ll_sc_body(op, i, v); \ +} + +ATOMIC64_OP(atomic64_andnot) +ATOMIC64_OP(atomic64_or) +ATOMIC64_OP(atomic64_xor) +ATOMIC64_OP(atomic64_add) +ATOMIC64_OP(atomic64_and) +ATOMIC64_OP(atomic64_sub) + + +#define ATOMIC64_FETCH_OP(name, op) \ +static inline long arch_##op##name(long i, atomic64_t *v) \ +{ \ + return __lse_ll_sc_body(op##name, i, v); \ +} + +#define ATOMIC64_FETCH_OPS(op) \ + ATOMIC64_FETCH_OP(_relaxed, op) \ + ATOMIC64_FETCH_OP(_acquire, op) \ + ATOMIC64_FETCH_OP(_release, op) \ + ATOMIC64_FETCH_OP( , op) + +ATOMIC64_FETCH_OPS(atomic64_fetch_andnot) +ATOMIC64_FETCH_OPS(atomic64_fetch_or) +ATOMIC64_FETCH_OPS(atomic64_fetch_xor) +ATOMIC64_FETCH_OPS(atomic64_fetch_add) +ATOMIC64_FETCH_OPS(atomic64_fetch_and) +ATOMIC64_FETCH_OPS(atomic64_fetch_sub) +ATOMIC64_FETCH_OPS(atomic64_add_return) +ATOMIC64_FETCH_OPS(atomic64_sub_return) + +static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +{ + return __lse_ll_sc_body(atomic64_dec_if_positive, v); +} + #define ATOMIC_INIT(i) { (i) } #define arch_atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/arm64/include/asm/atomic_arch.h b/arch/arm64/include/asm/atomic_arch.h deleted file mode 100644 index 1aac7fc65084..000000000000 --- a/arch/arm64/include/asm/atomic_arch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Selection between LSE and LL/SC atomics. - * - * Copyright (C) 2018 ARM Ltd. - * Author: Andrew Murray - */ - -#ifndef __ASM_ATOMIC_ARCH_H -#define __ASM_ATOMIC_ARCH_H - - -#include - -#include -#include -#include - -extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; - -static inline bool system_uses_lse_atomics(void) -{ - return (IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS) && - IS_ENABLED(CONFIG_AS_LSE) && - static_branch_likely(&arm64_const_caps_ready)) && - static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); -} - -#define __lse_ll_sc_body(op, ...) \ -({ \ - system_uses_lse_atomics() ? \ - __lse_##op(__VA_ARGS__) : \ - __ll_sc_##op(__VA_ARGS__); \ -}) - -#define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ -{ \ - __lse_ll_sc_body(op, i, v); \ -} - -ATOMIC_OP(atomic_andnot) -ATOMIC_OP(atomic_or) -ATOMIC_OP(atomic_xor) -ATOMIC_OP(atomic_add) -ATOMIC_OP(atomic_and) -ATOMIC_OP(atomic_sub) - - -#define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ -{ \ - return __lse_ll_sc_body(op##name, i, v); \ -} - -#define ATOMIC_FETCH_OPS(op) \ - ATOMIC_FETCH_OP(_relaxed, op) \ - ATOMIC_FETCH_OP(_acquire, op) \ - ATOMIC_FETCH_OP(_release, op) \ - ATOMIC_FETCH_OP( , op) - -ATOMIC_FETCH_OPS(atomic_fetch_andnot) -ATOMIC_FETCH_OPS(atomic_fetch_or) -ATOMIC_FETCH_OPS(atomic_fetch_xor) -ATOMIC_FETCH_OPS(atomic_fetch_add) -ATOMIC_FETCH_OPS(atomic_fetch_and) -ATOMIC_FETCH_OPS(atomic_fetch_sub) -ATOMIC_FETCH_OPS(atomic_add_return) -ATOMIC_FETCH_OPS(atomic_sub_return) - - -#define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ -{ \ - __lse_ll_sc_body(op, i, v); \ -} - -ATOMIC64_OP(atomic64_andnot) -ATOMIC64_OP(atomic64_or) -ATOMIC64_OP(atomic64_xor) -ATOMIC64_OP(atomic64_add) -ATOMIC64_OP(atomic64_and) -ATOMIC64_OP(atomic64_sub) - - -#define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ -{ \ - return __lse_ll_sc_body(op##name, i, v); \ -} - -#define ATOMIC64_FETCH_OPS(op) \ - ATOMIC64_FETCH_OP(_relaxed, op) \ - ATOMIC64_FETCH_OP(_acquire, op) \ - ATOMIC64_FETCH_OP(_release, op) \ - ATOMIC64_FETCH_OP( , op) - -ATOMIC64_FETCH_OPS(atomic64_fetch_andnot) -ATOMIC64_FETCH_OPS(atomic64_fetch_or) -ATOMIC64_FETCH_OPS(atomic64_fetch_xor) -ATOMIC64_FETCH_OPS(atomic64_fetch_add) -ATOMIC64_FETCH_OPS(atomic64_fetch_and) -ATOMIC64_FETCH_OPS(atomic64_fetch_sub) -ATOMIC64_FETCH_OPS(atomic64_add_return) -ATOMIC64_FETCH_OPS(atomic64_sub_return) - - -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) -{ - return __lse_ll_sc_body(atomic64_dec_if_positive, v); -} - -#define __CMPXCHG_CASE(name, sz) \ -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ - u##sz old, \ - u##sz new) \ -{ \ - return __lse_ll_sc_body(_cmpxchg_case_##name##sz, \ - ptr, old, new); \ -} - -__CMPXCHG_CASE( , 8) -__CMPXCHG_CASE( , 16) -__CMPXCHG_CASE( , 32) -__CMPXCHG_CASE( , 64) -__CMPXCHG_CASE(acq_, 8) -__CMPXCHG_CASE(acq_, 16) -__CMPXCHG_CASE(acq_, 32) -__CMPXCHG_CASE(acq_, 64) -__CMPXCHG_CASE(rel_, 8) -__CMPXCHG_CASE(rel_, 16) -__CMPXCHG_CASE(rel_, 32) -__CMPXCHG_CASE(rel_, 64) -__CMPXCHG_CASE(mb_, 8) -__CMPXCHG_CASE(mb_, 16) -__CMPXCHG_CASE(mb_, 32) -__CMPXCHG_CASE(mb_, 64) - - -#define __CMPXCHG_DBL(name) \ -static inline long __cmpxchg_double##name(unsigned long old1, \ - unsigned long old2, \ - unsigned long new1, \ - unsigned long new2, \ - volatile void *ptr) \ -{ \ - return __lse_ll_sc_body(_cmpxchg_double##name, \ - old1, old2, new1, new2, ptr); \ -} - -__CMPXCHG_DBL( ) -__CMPXCHG_DBL(_mb) - -#endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index e5fff8cd4904..afaba73e0b2c 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -104,6 +103,46 @@ __XCHG_GEN(_mb) #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) +#define __CMPXCHG_CASE(name, sz) \ +static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ + u##sz old, \ + u##sz new) \ +{ \ + return __lse_ll_sc_body(_cmpxchg_case_##name##sz, \ + ptr, old, new); \ +} + +__CMPXCHG_CASE( , 8) +__CMPXCHG_CASE( , 16) +__CMPXCHG_CASE( , 32) +__CMPXCHG_CASE( , 64) +__CMPXCHG_CASE(acq_, 8) +__CMPXCHG_CASE(acq_, 16) +__CMPXCHG_CASE(acq_, 32) +__CMPXCHG_CASE(acq_, 64) +__CMPXCHG_CASE(rel_, 8) +__CMPXCHG_CASE(rel_, 16) +__CMPXCHG_CASE(rel_, 32) +__CMPXCHG_CASE(rel_, 64) +__CMPXCHG_CASE(mb_, 8) +__CMPXCHG_CASE(mb_, 16) +__CMPXCHG_CASE(mb_, 32) +__CMPXCHG_CASE(mb_, 64) + +#define __CMPXCHG_DBL(name) \ +static inline long __cmpxchg_double##name(unsigned long old1, \ + unsigned long old2, \ + unsigned long new1, \ + unsigned long new2, \ + volatile void *ptr) \ +{ \ + return __lse_ll_sc_body(_cmpxchg_double##name, \ + old1, old2, new1, new2, ptr); \ +} + +__CMPXCHG_DBL( ) +__CMPXCHG_DBL(_mb) + #define __CMPXCHG_GEN(sfx) \ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ unsigned long old, \ diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 08e818e53ed7..80b388278149 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -2,22 +2,46 @@ #ifndef __ASM_LSE_H #define __ASM_LSE_H +#include + #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) #include #include +#include #include #include +#include #include __asm__(".arch_extension lse"); +extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; +extern struct static_key_false arm64_const_caps_ready; + +static inline bool system_uses_lse_atomics(void) +{ + return (static_branch_likely(&arm64_const_caps_ready)) && + static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); +} + +#define __lse_ll_sc_body(op, ...) \ +({ \ + system_uses_lse_atomics() ? \ + __lse_##op(__VA_ARGS__) : \ + __ll_sc_##op(__VA_ARGS__); \ +}) + /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +static inline bool system_uses_lse_atomics(void) { return false; } + +#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__) + #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ -- cgit v1.2.3 From 5aad6cdabbf91fd330bd216fe3c93d90f78bc7e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Aug 2019 14:33:23 +0100 Subject: arm64: atomics: Undefine internal macros after use We use a bunch of internal macros when constructing our atomic and cmpxchg routines in order to save on boilerplate. Avoid exposing these directly to users of the header files. Reviewed-by: Andrew Murray Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 7 +++++++ arch/arm64/include/asm/cmpxchg.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'arch/arm64/include/asm/atomic.h') diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 7c334337674d..916e5a6d5454 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -32,6 +32,7 @@ ATOMIC_OP(atomic_add) ATOMIC_OP(atomic_and) ATOMIC_OP(atomic_sub) +#undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ static inline int arch_##op##name(int i, atomic_t *v) \ @@ -54,6 +55,8 @@ ATOMIC_FETCH_OPS(atomic_fetch_sub) ATOMIC_FETCH_OPS(atomic_add_return) ATOMIC_FETCH_OPS(atomic_sub_return) +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ static inline void arch_##op(long i, atomic64_t *v) \ @@ -68,6 +71,7 @@ ATOMIC64_OP(atomic64_add) ATOMIC64_OP(atomic64_and) ATOMIC64_OP(atomic64_sub) +#undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ static inline long arch_##op##name(long i, atomic64_t *v) \ @@ -90,6 +94,9 @@ ATOMIC64_FETCH_OPS(atomic64_fetch_sub) ATOMIC64_FETCH_OPS(atomic64_add_return) ATOMIC64_FETCH_OPS(atomic64_sub_return) +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS + static inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index afaba73e0b2c..a1398f2f9994 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -129,6 +129,8 @@ __CMPXCHG_CASE(mb_, 16) __CMPXCHG_CASE(mb_, 32) __CMPXCHG_CASE(mb_, 64) +#undef __CMPXCHG_CASE + #define __CMPXCHG_DBL(name) \ static inline long __cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ @@ -143,6 +145,8 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ __CMPXCHG_DBL( ) __CMPXCHG_DBL(_mb) +#undef __CMPXCHG_DBL + #define __CMPXCHG_GEN(sfx) \ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ unsigned long old, \ -- cgit v1.2.3