summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 12:41:29 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-25 12:41:29 -0700
commitc86ad14d305d2429c3da19462440bac50c183def (patch)
treebd794cd72476661faf82c440063c217bb978ce44 /arch
parenta2303849a6b4b7ba59667091e00d6bb194071d9a (diff)
parentf06628638cf6e75f179742b6c1b35076965b9fdd (diff)
downloadlinux-c86ad14d305d2429c3da19462440bac50c183def.tar.bz2
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar: "The locking tree was busier in this cycle than the usual pattern - a couple of major projects happened to coincide. The main changes are: - implement the atomic_fetch_{add,sub,and,or,xor}() API natively across all SMP architectures (Peter Zijlstra) - add atomic_fetch_{inc/dec}() as well, using the generic primitives (Davidlohr Bueso) - optimize various aspects of rwsems (Jason Low, Davidlohr Bueso, Waiman Long) - optimize smp_cond_load_acquire() on arm64 and implement LSE based atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() on arm64 (Will Deacon) - introduce smp_acquire__after_ctrl_dep() and fix various barrier mis-uses and bugs (Peter Zijlstra) - after discovering ancient spin_unlock_wait() barrier bugs in its implementation and usage, strengthen its semantics and update/fix usage sites (Peter Zijlstra) - optimize mutex_trylock() fastpath (Peter Zijlstra) - ... misc fixes and cleanups" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (67 commits) locking/atomic: Introduce inc/dec variants for the atomic_fetch_$op() API locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() locking/static_keys: Fix non static symbol Sparse warning locking/qspinlock: Use __this_cpu_dec() instead of full-blown this_cpu_dec() locking/atomic, arch/tile: Fix tilepro build locking/atomic, arch/m68k: Remove comment locking/atomic, arch/arc: Fix build locking/Documentation: Clarify limited control-dependency scope locking/atomic, arch/rwsem: Employ atomic_long_fetch_add() locking/atomic, arch/qrwlock: Employ atomic_fetch_add_acquire() locking/atomic, arch/mips: Convert to _relaxed atomics locking/atomic, arch/alpha: Convert to _relaxed atomics locking/atomic: Remove the deprecated atomic_{set,clear}_mask() functions locking/atomic: Remove linux/atomic.h:atomic_fetch_or() locking/atomic: Implement atomic{,64,_long}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() locking/atomic: Fix atomic64_relaxed() bits locking/atomic, arch/xtensa: Implement atomic_fetch_{add,sub,and,or,xor}() locking/atomic, arch/x86: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() locking/atomic, arch/sparc: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() ...
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/atomic.h87
-rw-r--r--arch/alpha/include/asm/rwsem.h68
-rw-r--r--arch/alpha/include/asm/spinlock.h9
-rw-r--r--arch/arc/include/asm/atomic.h99
-rw-r--r--arch/arc/include/asm/spinlock.h7
-rw-r--r--arch/arm/include/asm/atomic.h106
-rw-r--r--arch/arm/include/asm/spinlock.h19
-rw-r--r--arch/arm64/include/asm/atomic.h60
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h110
-rw-r--r--arch/arm64/include/asm/atomic_lse.h278
-rw-r--r--arch/arm64/include/asm/barrier.h13
-rw-r--r--arch/arm64/include/asm/cmpxchg.h51
-rw-r--r--arch/avr32/include/asm/atomic.h54
-rw-r--r--arch/blackfin/include/asm/atomic.h8
-rw-r--r--arch/blackfin/include/asm/spinlock.h5
-rw-r--r--arch/blackfin/kernel/bfin_ksyms.c1
-rw-r--r--arch/blackfin/mach-bf561/atomic.S43
-rw-r--r--arch/frv/include/asm/atomic.h30
-rw-r--r--arch/frv/include/asm/atomic_defs.h2
-rw-r--r--arch/h8300/include/asm/atomic.h29
-rw-r--r--arch/hexagon/include/asm/atomic.h31
-rw-r--r--arch/hexagon/include/asm/spinlock.h10
-rw-r--r--arch/ia64/include/asm/atomic.h130
-rw-r--r--arch/ia64/include/asm/mutex.h2
-rw-r--r--arch/ia64/include/asm/rwsem.h31
-rw-r--r--arch/ia64/include/asm/spinlock.h4
-rw-r--r--arch/m32r/include/asm/atomic.h36
-rw-r--r--arch/m32r/include/asm/spinlock.h9
-rw-r--r--arch/m68k/include/asm/atomic.h44
-rw-r--r--arch/metag/include/asm/atomic_lnkget.h36
-rw-r--r--arch/metag/include/asm/atomic_lock1.h33
-rw-r--r--arch/metag/include/asm/spinlock.h14
-rw-r--r--arch/mips/include/asm/atomic.h154
-rw-r--r--arch/mips/include/asm/spinlock.h19
-rw-r--r--arch/mn10300/include/asm/atomic.h33
-rw-r--r--arch/mn10300/include/asm/spinlock.h8
-rw-r--r--arch/parisc/include/asm/atomic.h63
-rw-r--r--arch/parisc/include/asm/spinlock.h9
-rw-r--r--arch/powerpc/include/asm/atomic.h83
-rw-r--r--arch/powerpc/include/asm/mutex.h2
-rw-r--r--arch/s390/include/asm/atomic.h40
-rw-r--r--arch/s390/include/asm/rwsem.h37
-rw-r--r--arch/s390/include/asm/spinlock.h3
-rw-r--r--arch/sh/include/asm/atomic-grb.h34
-rw-r--r--arch/sh/include/asm/atomic-irq.h31
-rw-r--r--arch/sh/include/asm/atomic-llsc.h32
-rw-r--r--arch/sh/include/asm/spinlock.h10
-rw-r--r--arch/sparc/include/asm/atomic_32.h13
-rw-r--r--arch/sparc/include/asm/atomic_64.h16
-rw-r--r--arch/sparc/include/asm/spinlock_32.h7
-rw-r--r--arch/sparc/include/asm/spinlock_64.h10
-rw-r--r--arch/sparc/lib/atomic32.c29
-rw-r--r--arch/sparc/lib/atomic_64.S61
-rw-r--r--arch/sparc/lib/ksyms.c17
-rw-r--r--arch/tile/include/asm/atomic.h2
-rw-r--r--arch/tile/include/asm/atomic_32.h74
-rw-r--r--arch/tile/include/asm/atomic_64.h115
-rw-r--r--arch/tile/include/asm/barrier.h7
-rw-r--r--arch/tile/include/asm/bitops_32.h18
-rw-r--r--arch/tile/include/asm/futex.h14
-rw-r--r--arch/tile/lib/atomic_32.c50
-rw-r--r--arch/tile/lib/atomic_asm_32.S27
-rw-r--r--arch/tile/lib/spinlock_32.c6
-rw-r--r--arch/tile/lib/spinlock_64.c6
-rw-r--r--arch/x86/include/asm/atomic.h35
-rw-r--r--arch/x86/include/asm/atomic64_32.h25
-rw-r--r--arch/x86/include/asm/atomic64_64.h35
-rw-r--r--arch/x86/include/asm/mutex_32.h2
-rw-r--r--arch/x86/include/asm/mutex_64.h6
-rw-r--r--arch/x86/include/asm/rwsem.h18
-rw-r--r--arch/xtensa/include/asm/atomic.h52
-rw-r--r--arch/xtensa/include/asm/spinlock.h10
72 files changed, 2063 insertions, 609 deletions
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 572b228c44c7..498933a7df97 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -46,10 +46,9 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
} \
#define ATOMIC_OP_RETURN(op, asm_op) \
-static inline int atomic_##op##_return(int i, atomic_t *v) \
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
{ \
long temp, result; \
- smp_mb(); \
__asm__ __volatile__( \
"1: ldl_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -61,7 +60,23 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
".previous" \
:"=&r" (temp), "=m" (v->counter), "=&r" (result) \
:"Ir" (i), "m" (v->counter) : "memory"); \
- smp_mb(); \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ long temp, result; \
+ __asm__ __volatile__( \
+ "1: ldl_l %2,%1\n" \
+ " " #asm_op " %2,%3,%0\n" \
+ " stl_c %0,%1\n" \
+ " beq %0,2f\n" \
+ ".subsection 2\n" \
+ "2: br 1b\n" \
+ ".previous" \
+ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \
+ :"Ir" (i), "m" (v->counter) : "memory"); \
return result; \
}
@@ -82,10 +97,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
} \
#define ATOMIC64_OP_RETURN(op, asm_op) \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
{ \
long temp, result; \
- smp_mb(); \
__asm__ __volatile__( \
"1: ldq_l %0,%1\n" \
" " #asm_op " %0,%3,%2\n" \
@@ -97,34 +111,77 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
".previous" \
:"=&r" (temp), "=m" (v->counter), "=&r" (result) \
:"Ir" (i), "m" (v->counter) : "memory"); \
- smp_mb(); \
+ return result; \
+}
+
+#define ATOMIC64_FETCH_OP(op, asm_op) \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+{ \
+ long temp, result; \
+ __asm__ __volatile__( \
+ "1: ldq_l %2,%1\n" \
+ " " #asm_op " %2,%3,%0\n" \
+ " stq_c %0,%1\n" \
+ " beq %0,2f\n" \
+ ".subsection 2\n" \
+ "2: br 1b\n" \
+ ".previous" \
+ :"=&r" (temp), "=m" (v->counter), "=&r" (result) \
+ :"Ir" (i), "m" (v->counter) : "memory"); \
return result; \
}
#define ATOMIC_OPS(op) \
ATOMIC_OP(op, op##l) \
ATOMIC_OP_RETURN(op, op##l) \
+ ATOMIC_FETCH_OP(op, op##l) \
ATOMIC64_OP(op, op##q) \
- ATOMIC64_OP_RETURN(op, op##q)
+ ATOMIC64_OP_RETURN(op, op##q) \
+ ATOMIC64_FETCH_OP(op, op##q)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
#define atomic_andnot atomic_andnot
#define atomic64_andnot atomic64_andnot
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, bis)
-ATOMIC_OP(xor, xor)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, bis)
-ATOMIC64_OP(xor, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm) \
+ ATOMIC_OP(op, asm) \
+ ATOMIC_FETCH_OP(op, asm) \
+ ATOMIC64_OP(op, asm) \
+ ATOMIC64_FETCH_OP(op, asm)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, bis)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 0131a7058778..77873d0ad293 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long old, new, res;
- res = sem->count;
+ res = atomic_long_read(&sem->count);
do {
new = res + RWSEM_ACTIVE_READ_BIAS;
if (new <= 0)
break;
old = res;
- res = cmpxchg(&sem->count, old, new);
+ res = atomic_long_cmpxchg(&sem->count, old, new);
} while (res != old);
return res >= 0 ? 1 : 0;
}
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
{
long count;
#ifndef CONFIG_SMP
- sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count;
+ sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+ count = sem->count.counter;
#else
long temp;
__asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_WAITING_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_WAITING_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%2,%0\n"
- " stq_c %0,%1\n"
- " beq %0,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (temp), "=m" (sem->count)
- :"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
- return sem->count;
-#else
- long ret, temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " addq %0,%3,%0\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (ret), "=m" (sem->count), "=&r" (temp)
- :"Ir" (val), "m" (sem->count));
-
- return ret;
-#endif
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index fed9c6f44c19..a40b9fc0c6c3 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -3,6 +3,8 @@
#include <linux/kernel.h>
#include <asm/current.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
@@ -13,8 +15,11 @@
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
#define arch_spin_is_locked(x) ((x)->lock != 0)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while ((x)->lock)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index dd683995bc9d..4e3c1b6b0806 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -67,6 +67,33 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return val; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned int val, orig; \
+ \
+ /* \
+ * Explicit full memory barrier needed before/after as \
+ * LLOCK/SCOND thmeselves don't provide any such semantics \
+ */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ "1: llock %[orig], [%[ctr]] \n" \
+ " " #asm_op " %[val], %[orig], %[i] \n" \
+ " scond %[val], [%[ctr]] \n" \
+ " \n" \
+ : [val] "=&r" (val), \
+ [orig] "=&r" (orig) \
+ : [ctr] "r" (&v->counter), \
+ [i] "ir" (i) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return orig; \
+}
+
#else /* !CONFIG_ARC_HAS_LLSC */
#ifndef CONFIG_SMP
@@ -129,25 +156,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ unsigned long orig; \
+ \
+ /* \
+ * spin lock/unlock provides the needed smp_mb() before/after \
+ */ \
+ atomic_ops_lock(flags); \
+ orig = v->counter; \
+ v->counter c_op i; \
+ atomic_ops_unlock(flags); \
+ \
+ return orig; \
+}
+
#endif /* !CONFIG_ARC_HAS_LLSC */
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#define atomic_andnot atomic_andnot
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
-#undef SCOND_FAIL_RETRY_VAR_DEF
-#undef SCOND_FAIL_RETRY_ASM
-#undef SCOND_FAIL_RETRY_VARS
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
#else /* CONFIG_ARC_PLAT_EZNPS */
@@ -208,22 +254,51 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned int temp = i; \
+ \
+ /* Explicit full memory barrier needed before/after */ \
+ smp_mb(); \
+ \
+ __asm__ __volatile__( \
+ " mov r2, %0\n" \
+ " mov r3, %1\n" \
+ " .word %2\n" \
+ " mov %0, r2" \
+ : "+r"(temp) \
+ : "r"(&v->counter), "i"(asm_op) \
+ : "r2", "r3", "memory"); \
+ \
+ smp_mb(); \
+ \
+ return temp; \
+}
+
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
#define atomic_sub(i, v) atomic_add(-(i), (v))
#define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
-ATOMIC_OP(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-ATOMIC_OP(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
-ATOMIC_OP(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
+ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
+ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
#endif /* CONFIG_ARC_PLAT_EZNPS */
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h
index cded4a9b5438..233d5ffe6ec7 100644
--- a/arch/arc/include/asm/spinlock.h
+++ b/arch/arc/include/asm/spinlock.h
@@ -15,8 +15,11 @@
#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
#ifdef CONFIG_ARC_HAS_LLSC
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9e10c4567eb4..66d0e215a773 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -77,8 +77,36 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result, val; \
+ \
+ prefetchw(&v->counter); \
+ \
+ __asm__ __volatile__("@ atomic_fetch_" #op "\n" \
+"1: ldrex %0, [%4]\n" \
+" " #asm_op " %1, %0, %5\n" \
+" strex %2, %1, [%4]\n" \
+" teq %2, #0\n" \
+" bne 1b" \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "Ir" (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
{
@@ -159,6 +187,20 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return val; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int val; \
+ \
+ raw_local_irq_save(flags); \
+ val = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ \
+ return val; \
+}
+
static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
int ret;
@@ -187,19 +229,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
#define atomic_andnot atomic_andnot
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(andnot, &= ~, bic)
-ATOMIC_OP(or, |=, orr)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, orr)
+ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -317,24 +366,61 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \
return result; \
}
+#define ATOMIC64_FETCH_OP(op, op1, op2) \
+static inline long long \
+atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \
+{ \
+ long long result, val; \
+ unsigned long tmp; \
+ \
+ prefetchw(&v->counter); \
+ \
+ __asm__ __volatile__("@ atomic64_fetch_" #op "\n" \
+"1: ldrexd %0, %H0, [%4]\n" \
+" " #op1 " %Q1, %Q0, %Q5\n" \
+" " #op2 " %R1, %R0, %R5\n" \
+" strexd %2, %1, %H1, [%4]\n" \
+" teq %2, #0\n" \
+" bne 1b" \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Qo" (v->counter) \
+ : "r" (&v->counter), "r" (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
#define ATOMIC64_OPS(op, op1, op2) \
ATOMIC64_OP(op, op1, op2) \
- ATOMIC64_OP_RETURN(op, op1, op2)
+ ATOMIC64_OP_RETURN(op, op1, op2) \
+ ATOMIC64_FETCH_OP(op, op1, op2)
ATOMIC64_OPS(add, adds, adc)
ATOMIC64_OPS(sub, subs, sbc)
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2) \
+ ATOMIC64_OP(op, op1, op2) \
+ ATOMIC64_FETCH_OP(op, op1, op2)
#define atomic64_andnot atomic64_andnot
-ATOMIC64_OP(and, and, and)
-ATOMIC64_OP(andnot, bic, bic)
-ATOMIC64_OP(or, orr, orr)
-ATOMIC64_OP(xor, eor, eor)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, orr, orr)
+ATOMIC64_OPS(xor, eor, eor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 0fa418463f49..4bec45442072 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -6,6 +6,8 @@
#endif
#include <linux/prefetch.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
@@ -50,8 +52,21 @@ static inline void dsb_sev(void)
* memory.
*/
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u16 owner = READ_ONCE(lock->tickets.owner);
+
+ for (;;) {
+ arch_spinlock_t tmp = READ_ONCE(*lock);
+
+ if (tmp.tickets.owner == tmp.tickets.next ||
+ tmp.tickets.owner != owner)
+ break;
+
+ wfe();
+ }
+ smp_acquire__after_ctrl_dep();
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index f3a3586a421c..c0235e0ff849 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -76,6 +76,36 @@
#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire atomic_fetch_add_acquire
+#define atomic_fetch_add_release atomic_fetch_add_release
+#define atomic_fetch_add atomic_fetch_add
+
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
+#define atomic_fetch_sub_release atomic_fetch_sub_release
+#define atomic_fetch_sub atomic_fetch_sub
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire atomic_fetch_and_acquire
+#define atomic_fetch_and_release atomic_fetch_and_release
+#define atomic_fetch_and atomic_fetch_and
+
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
+#define atomic_fetch_andnot_release atomic_fetch_andnot_release
+#define atomic_fetch_andnot atomic_fetch_andnot
+
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire atomic_fetch_or_acquire
+#define atomic_fetch_or_release atomic_fetch_or_release
+#define atomic_fetch_or atomic_fetch_or
+
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
+#define atomic_fetch_xor_release atomic_fetch_xor_release
+#define atomic_fetch_xor atomic_fetch_xor
+
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
@@ -125,6 +155,36 @@
#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
+#define atomic64_fetch_add_release atomic64_fetch_add_release
+#define atomic64_fetch_add atomic64_fetch_add
+
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
+#define atomic64_fetch_sub_release atomic64_fetch_sub_release
+#define atomic64_fetch_sub atomic64_fetch_sub
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
+#define atomic64_fetch_and_release atomic64_fetch_and_release
+#define atomic64_fetch_and atomic64_fetch_and
+
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
+#define atomic64_fetch_andnot atomic64_fetch_andnot
+
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
+#define atomic64_fetch_or_release atomic64_fetch_or_release
+#define atomic64_fetch_or atomic64_fetch_or
+
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
+#define atomic64_fetch_xor_release atomic64_fetch_xor_release
+#define atomic64_fetch_xor atomic64_fetch_xor
+
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f61c84f6ba02..f819fdcff1ac 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
} \
__LL_SC_EXPORT(atomic_##op##_return##name);
+#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+__LL_SC_INLINE int \
+__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \
+{ \
+ unsigned long tmp; \
+ int val, result; \
+ \
+ asm volatile("// atomic_fetch_" #op #name "\n" \
+" prfm pstl1strm, %3\n" \
+"1: ld" #acq "xr %w0, %3\n" \
+" " #asm_op " %w1, %w0, %w4\n" \
+" st" #rel "xr %w2, %w1, %3\n" \
+" cbnz %w2, 1b\n" \
+" " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : "Ir" (i) \
+ : cl); \
+ \
+ return result; \
+} \
+__LL_SC_EXPORT(atomic_fetch_##op##name);
+
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
- ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
-
-#define ATOMIC_OPS_RLX(...) \
- ATOMIC_OPS(__VA_ARGS__) \
+ ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\
ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
- ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
+ ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OPS_RLX(add, add)
-ATOMIC_OPS_RLX(sub, sub)
+ATOMIC_OPS(add, add)
+ATOMIC_OPS(sub, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(...) \
+ ATOMIC_OP(__VA_ARGS__) \
+ ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\
+ ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__)
-ATOMIC_OP(and, and)
-ATOMIC_OP(andnot, bic)
-ATOMIC_OP(or, orr)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(andnot, bic)
+ATOMIC_OPS(or, orr)
+ATOMIC_OPS(xor, eor)
-#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
} \
__LL_SC_EXPORT(atomic64_##op##_return##name);
+#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \
+__LL_SC_INLINE long \
+__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \
+{ \
+ long result, val; \
+ unsigned long tmp; \
+ \
+ asm volatile("// atomic64_fetch_" #op #name "\n" \
+" prfm pstl1strm, %3\n" \
+"1: ld" #acq "xr %0, %3\n" \
+" " #asm_op " %1, %0, %4\n" \
+" st" #rel "xr %w2, %1, %3\n" \
+" cbnz %w2, 1b\n" \
+" " #mb \
+ : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \
+ : "Ir" (i) \
+ : cl); \
+ \
+ return result; \
+} \
+__LL_SC_EXPORT(atomic64_fetch_##op##name);
+
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
- ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
-
-#define ATOMIC64_OPS_RLX(...) \
- ATOMIC64_OPS(__VA_ARGS__) \
+ ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \
ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
- ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
+ ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OPS_RLX(add, add)
-ATOMIC64_OPS_RLX(sub, sub)
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, sub)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(...) \
+ ATOMIC64_OP(__VA_ARGS__) \
+ ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \
+ ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(andnot, bic)
-ATOMIC64_OP(or, orr)
-ATOMIC64_OP(xor, eor)
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(andnot, bic)
+ATOMIC64_OPS(or, orr)
+ATOMIC64_OPS(xor, eor)
-#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 39c1d340fec5..b5890be8f257 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -26,54 +26,57 @@
#endif
#define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op)
-
-static inline void atomic_andnot(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot),
- " stclr %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_OP(op, asm_op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \
+" " #asm_op " %w[i], %[v]\n") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS); \
}
-static inline void atomic_or(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
+ATOMIC_OP(andnot, stclr)
+ATOMIC_OP(or, stset)
+ATOMIC_OP(xor, steor)
+ATOMIC_OP(add, stadd)
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or),
- " stset %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+#undef ATOMIC_OP
-static inline void atomic_xor(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor),
- " steor %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \
+static inline int atomic_fetch_##op##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ __LL_SC_ATOMIC(fetch_##op##name), \
+ /* LSE atomics */ \
+" " #asm_op #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
}
-static inline void atomic_add(int i, atomic_t *v)
-{
- register int w0 asm ("w0") = i;
- register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_FETCH_OPS(op, asm_op) \
+ ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC_FETCH_OP( , al, op, asm_op, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add),
- " stadd %w[i], %[v]\n")
- : [i] "+r" (w0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+ATOMIC_FETCH_OPS(andnot, ldclr)
+ATOMIC_FETCH_OPS(or, ldset)
+ATOMIC_FETCH_OPS(xor, ldeor)
+ATOMIC_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_FETCH_OPS
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
static inline int atomic_add_return##name(int i, atomic_t *v) \
@@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v)
: __LL_SC_CLOBBERS);
}
+#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \
+static inline int atomic_fetch_and##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(fetch_and##name), \
+ /* LSE atomics */ \
+ " mvn %w[i], %w[i]\n" \
+ " ldclr" #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
+}
+
+ATOMIC_FETCH_OP_AND(_relaxed, )
+ATOMIC_FETCH_OP_AND(_acquire, a, "memory")
+ATOMIC_FETCH_OP_AND(_release, l, "memory")
+ATOMIC_FETCH_OP_AND( , al, "memory")
+
+#undef ATOMIC_FETCH_OP_AND
+
static inline void atomic_sub(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
@@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory")
ATOMIC_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC_OP_SUB_RETURN
-#undef __LL_SC_ATOMIC
-
-#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
-
-static inline void atomic64_andnot(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot),
- " stclr %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \
+static inline int atomic_fetch_sub##name(int i, atomic_t *v) \
+{ \
+ register int w0 asm ("w0") = i; \
+ register atomic_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC(fetch_sub##name), \
+ /* LSE atomics */ \
+ " neg %w[i], %w[i]\n" \
+ " ldadd" #mb " %w[i], %w[i], %[v]") \
+ : [i] "+r" (w0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return w0; \
}
-static inline void atomic64_or(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+ATOMIC_FETCH_OP_SUB(_relaxed, )
+ATOMIC_FETCH_OP_SUB(_acquire, a, "memory")
+ATOMIC_FETCH_OP_SUB(_release, l, "memory")
+ATOMIC_FETCH_OP_SUB( , al, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or),
- " stset %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#undef ATOMIC_FETCH_OP_SUB
+#undef __LL_SC_ATOMIC
+
+#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
+#define ATOMIC64_OP(op, asm_op) \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \
+" " #asm_op " %[i], %[v]\n") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS); \
}
-static inline void atomic64_xor(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+ATOMIC64_OP(andnot, stclr)
+ATOMIC64_OP(or, stset)
+ATOMIC64_OP(xor, steor)
+ATOMIC64_OP(add, stadd)
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor),
- " steor %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
+#undef ATOMIC64_OP
+
+#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \
+static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("x0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ __LL_SC_ATOMIC64(fetch_##op##name), \
+ /* LSE atomics */ \
+" " #asm_op #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
}
-static inline void atomic64_add(long i, atomic64_t *v)
-{
- register long x0 asm ("x0") = i;
- register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_FETCH_OPS(op, asm_op) \
+ ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \
+ ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \
+ ATOMIC64_FETCH_OP( , al, op, asm_op, "memory")
- asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add),
- " stadd %[i], %[v]\n")
- : [i] "+r" (x0), [v] "+Q" (v->counter)
- : "r" (x1)
- : __LL_SC_CLOBBERS);
-}
+ATOMIC64_FETCH_OPS(andnot, ldclr)
+ATOMIC64_FETCH_OPS(or, ldset)
+ATOMIC64_FETCH_OPS(xor, ldeor)
+ATOMIC64_FETCH_OPS(add, ldadd)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_FETCH_OPS
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
static inline long atomic64_add_return##name(long i, atomic64_t *v) \
@@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v)
: __LL_SC_CLOBBERS);
}
+#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \
+static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("w0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(fetch_and##name), \
+ /* LSE atomics */ \
+ " mvn %[i], %[i]\n" \
+ " ldclr" #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
+}
+
+ATOMIC64_FETCH_OP_AND(_relaxed, )
+ATOMIC64_FETCH_OP_AND(_acquire, a, "memory")
+ATOMIC64_FETCH_OP_AND(_release, l, "memory")
+ATOMIC64_FETCH_OP_AND( , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_AND
+
static inline void atomic64_sub(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
@@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC64_OP_SUB_RETURN
+#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \
+static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \
+{ \
+ register long x0 asm ("w0") = i; \
+ register atomic64_t *x1 asm ("x1") = v; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " nop\n" \
+ __LL_SC_ATOMIC64(fetch_sub##name), \
+ /* LSE atomics */ \
+ " neg %[i], %[i]\n" \
+ " ldadd" #mb " %[i], %[i], %[v]") \
+ : [i] "+r" (x0), [v] "+Q" (v->counter) \
+ : "r" (x1) \
+ : __LL_SC_CLOBBERS, ##cl); \
+ \
+ return x0; \
+}
+
+ATOMIC64_FETCH_OP_SUB(_relaxed, )
+ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory")
+ATOMIC64_FETCH_OP_SUB(_release, l, "memory")
+ATOMIC64_FETCH_OP_SUB( , al, "memory")
+
+#undef ATOMIC64_FETCH_OP_SUB
+
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index dae5c49618db..4eea7f618dce 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -91,6 +91,19 @@ do { \
__u.__val; \
})
+#define smp_cond_load_acquire(ptr, cond_expr) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ } \
+ VAL; \
+})
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 510c7b404454..bd86a79491bc 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
+#define __CMPWAIT_CASE(w, sz, name) \
+static inline void __cmpwait_case_##name(volatile void *ptr, \
+ unsigned long val) \
+{ \
+ unsigned long tmp; \
+ \
+ asm volatile( \
+ " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " cbnz %" #w "[tmp], 1f\n" \
+ " wfe\n" \
+ "1:" \
+ : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \
+ : [val] "r" (val)); \
+}
+
+__CMPWAIT_CASE(w, b, 1);
+__CMPWAIT_CASE(w, h, 2);
+__CMPWAIT_CASE(w, , 4);
+__CMPWAIT_CASE( , , 8);
+
+#undef __CMPWAIT_CASE
+
+#define __CMPWAIT_GEN(sfx) \
+static inline void __cmpwait##sfx(volatile void *ptr, \
+ unsigned long val, \
+ int size) \
+{ \
+ switch (size) { \
+ case 1: \
+ return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ case 2: \
+ return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ case 4: \
+ return __cmpwait_case##sfx##_4(ptr, val); \
+ case 8: \
+ return __cmpwait_case##sfx##_8(ptr, val); \
+ default: \
+ BUILD_BUG(); \
+ } \
+ \
+ unreachable(); \
+}
+
+__CMPWAIT_GEN()
+
+#undef __CMPWAIT_GEN
+
+#define __cmpwait_relaxed(ptr, val) \
+ __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+
#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index d74fd8ce980a..3d5ce38a6f0b 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -41,21 +41,49 @@ static inline int __atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op, asm_op, asm_con) \
+static inline int __atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int result, val; \
+ \
+ asm volatile( \
+ "/* atomic_fetch_" #op " */\n" \
+ "1: ssrf 5\n" \
+ " ld.w %0, %3\n" \
+ " mov %1, %0\n" \
+ " " #asm_op " %1, %4\n" \
+ " stcond %2, %1\n" \
+ " brne 1b" \
+ : "=&r" (result), "=&r" (val), "=o" (v->counter) \
+ : "m" (v->counter), #asm_con (i) \
+ : "cc"); \
+ \
+ return result; \
+}
+
ATOMIC_OP_RETURN(sub, sub, rKs21)
ATOMIC_OP_RETURN(add, add, r)
+ATOMIC_FETCH_OP (sub, sub, rKs21)
+ATOMIC_FETCH_OP (add, add, r)
-#define ATOMIC_OP(op, asm_op) \
+#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP_RETURN(op, asm_op, r) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
(void)__atomic_##op##_return(i, v); \
+} \
+ATOMIC_FETCH_OP(op, asm_op, r) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic_fetch_##op(i, v); \
}
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, eor)
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, eor)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
/*
@@ -87,6 +115,14 @@ static inline int atomic_add_return(int i, atomic_t *v)
return __atomic_add_return(i, v);
}
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ if (IS_21BIT_CONST(i))
+ return __atomic_fetch_sub(-i, v);
+
+ return __atomic_fetch_add(i, v);
+}
+
/*
* atomic_sub_return - subtract the atomic variable
* @i: integer value to subtract
@@ -102,6 +138,14 @@ static inline int atomic_sub_return(int i, atomic_t *v)
return __atomic_add_return(-i, v);
}
+static inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+ if (IS_21BIT_CONST(i))
+ return __atomic_fetch_sub(i, v);
+
+ return __atomic_fetch_add(-i, v);
+}
+
/*
* __atomic_add_unless - add unless the number is a given value
* @v: pointer of type atomic_t
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 1c1c42330c99..63c7deceeeb6 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -17,6 +17,7 @@
asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_xadd_asm(volatile int *ptr, int value);
asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
@@ -28,10 +29,17 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
#define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
#define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
+#define atomic_fetch_add(i, v) __raw_atomic_xadd_asm(&(v)->counter, i)
+#define atomic_fetch_sub(i, v) __raw_atomic_xadd_asm(&(v)->counter, -(i))
+
#define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i)
#define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
#define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
+#define atomic_fetch_or(i, v) __raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_fetch_and(i, v) __raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_fetch_xor(i, v) __raw_atomic_xor_asm(&(v)->counter, i)
+
#endif
#include <asm-generic/atomic.h>
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 490c7caa02d9..c58f4a83ed6f 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -12,6 +12,8 @@
#else
#include <linux/atomic.h>
+#include <asm/processor.h>
+#include <asm/barrier.h>
asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
@@ -48,8 +50,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
- while (arch_spin_is_locked(lock))
- cpu_relax();
+ smp_cond_load_acquire(&lock->lock, !VAL);
}
static inline int arch_read_can_lock(arch_rwlock_t *rw)
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index a401c27b69b4..68096e8f787f 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(insl_16);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_xadd_asm);
EXPORT_SYMBOL(__raw_atomic_and_asm);
EXPORT_SYMBOL(__raw_atomic_or_asm);
EXPORT_SYMBOL(__raw_atomic_xor_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 26fccb5568b9..1e2989c5d6b2 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -607,6 +607,28 @@ ENDPROC(___raw_atomic_add_asm)
/*
* r0 = ptr
+ * r1 = value
+ *
+ * ADD a signed value to a 32bit word and return the old value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xadd_asm)
+ p1 = r0;
+ r3 = r1;
+ [--sp] = rets;
+ call _get_core_lock;
+ r3 = [p1];
+ r2 = r3 + r2;
+ [p1] = r2;
+ r1 = p1;
+ call _put_core_lock;
+ r0 = r3;
+ rets = [sp++];
+ rts;
+ENDPROC(___raw_atomic_add_asm)
+
+/*
+ * r0 = ptr
* r1 = mask
*
* AND the mask bits from a 32bit word and return the old 32bit value
@@ -618,10 +640,9 @@ ENTRY(___raw_atomic_and_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 & r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 & r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
@@ -642,10 +663,9 @@ ENTRY(___raw_atomic_or_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 | r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 | r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
@@ -666,10 +686,9 @@ ENTRY(___raw_atomic_xor_asm)
r3 = r1;
[--sp] = rets;
call _get_core_lock;
- r2 = [p1];
- r3 = r2 ^ r3;
- [p1] = r3;
- r3 = r2;
+ r3 = [p1];
+ r2 = r2 ^ r3;
+ [p1] = r2;
r1 = p1;
call _put_core_lock;
r0 = r3;
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 64f02d451aa8..1c2a5e264fc7 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -60,16 +60,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
return atomic_add_return(i, v) < 0;
}
-static inline void atomic_add(int i, atomic_t *v)
-{
- atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
- atomic_sub_return(i, v);
-}
-
static inline void atomic_inc(atomic_t *v)
{
atomic_inc_return(v);
@@ -136,16 +126,6 @@ static inline long long atomic64_add_negative(long long i, atomic64_t *v)
return atomic64_add_return(i, v) < 0;
}
-static inline void atomic64_add(long long i, atomic64_t *v)
-{
- atomic64_add_return(i, v);
-}
-
-static inline void atomic64_sub(long long i, atomic64_t *v)
-{
- atomic64_sub_return(i, v);
-}
-
static inline void atomic64_inc(atomic64_t *v)
{
atomic64_inc_return(v);
@@ -182,11 +162,19 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
}
#define ATOMIC_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __atomic32_fetch_##op(i, &v->counter); \
+} \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
(void)__atomic32_fetch_##op(i, &v->counter); \
} \
\
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ return __atomic64_fetch_##op(i, &v->counter); \
+} \
static inline void atomic64_##op(long long i, atomic64_t *v) \
{ \
(void)__atomic64_fetch_##op(i, &v->counter); \
@@ -195,6 +183,8 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
ATOMIC_OP(or)
ATOMIC_OP(and)
ATOMIC_OP(xor)
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
#undef ATOMIC_OP
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
index 36e126d2f801..d4912c88b829 100644
--- a/arch/frv/include/asm/atomic_defs.h
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -162,6 +162,8 @@ ATOMIC_EXPORT(__atomic64_fetch_##op);
ATOMIC_FETCH_OP(or)
ATOMIC_FETCH_OP(and)
ATOMIC_FETCH_OP(xor)
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
ATOMIC_OP_RETURN(add)
ATOMIC_OP_RETURN(sub)
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 4435a445ae7e..349a47a918db 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -28,6 +28,19 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return ret; \
}
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ h8300flags flags; \
+ int ret; \
+ \
+ flags = arch_local_irq_save(); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ arch_local_irq_restore(flags); \
+ return ret; \
+}
+
#define ATOMIC_OP(op, c_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
@@ -41,17 +54,21 @@ static inline void atomic_##op(int i, atomic_t *v) \
ATOMIC_OP_RETURN(add, +=)
ATOMIC_OP_RETURN(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+ATOMIC_OPS(add, +=)
+ATOMIC_OPS(sub, -=)
+#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
-#define atomic_add(i, v) (void)atomic_add_return(i, v)
#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
-
-#define atomic_sub(i, v) (void)atomic_sub_return(i, v)
#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
#define atomic_inc_return(v) atomic_add_return(1, v)
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 55696c4100d4..a62ba368b27d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -110,7 +110,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
); \
} \
-#define ATOMIC_OP_RETURN(op) \
+#define ATOMIC_OP_RETURN(op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
int output; \
@@ -127,16 +127,37 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return output; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int output, val; \
+ \
+ __asm__ __volatile__ ( \
+ "1: %0 = memw_locked(%2);\n" \
+ " %1 = "#op "(%0,%3);\n" \
+ " memw_locked(%2,P3)=%1;\n" \
+ " if !P3 jump 1b;\n" \
+ : "=&r" (output), "=&r" (val) \
+ : "r" (&v->counter), "r" (i) \
+ : "memory", "p3" \
+ ); \
+ return output; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index 12ca4ebc0338..a1c55788c5d6 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -23,6 +23,8 @@
#define _ASM_SPINLOCK_H
#include <asm/irqflags.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* This file is pulled in for SMP builds.
@@ -176,8 +178,12 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
* SMP spinlocks are intended to allow only a single CPU at the lock
*/
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(lock) \
- do {while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
+
#define arch_spin_is_locked(x) ((x)->lock != 0)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 8dfb5f6f6c35..f565ad376142 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -42,8 +42,27 @@ ia64_atomic_##op (int i, atomic_t *v) \
return new; \
}
-ATOMIC_OP(add, +)
-ATOMIC_OP(sub, -)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static __inline__ int \
+ia64_atomic_fetch_##op (int i, atomic_t *v) \
+{ \
+ __s32 old, new; \
+ CMPXCHG_BUGCHECK_DECL \
+ \
+ do { \
+ CMPXCHG_BUGCHECK(v); \
+ old = atomic_read(v); \
+ new = old c_op i; \
+ } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
+ return old; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(add, +)
+ATOMIC_OPS(sub, -)
#define atomic_add_return(i,v) \
({ \
@@ -69,14 +88,44 @@ ATOMIC_OP(sub, -)
: ia64_atomic_sub(__ia64_asr_i, v); \
})
-ATOMIC_OP(and, &)
-ATOMIC_OP(or, |)
-ATOMIC_OP(xor, ^)
+#define atomic_fetch_add(i,v) \
+({ \
+ int __ia64_aar_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
+ || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
+ || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
+ || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
+ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
+ : ia64_atomic_fetch_add(__ia64_aar_i, v); \
+})
+
+#define atomic_fetch_sub(i,v) \
+({ \
+ int __ia64_asr_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
+ || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
+ || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
+ || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
+ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
+ : ia64_atomic_fetch_sub(__ia64_asr_i, v); \
+})
+
+ATOMIC_FETCH_OP(and, &)
+ATOMIC_FETCH_OP(or, |)
+ATOMIC_FETCH_OP(xor, ^)
+
+#define atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v)
+#define atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v)
+#define atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v)
-#define atomic_and(i,v) (void)ia64_atomic_and(i,v)
-#define atomic_or(i,v) (void)ia64_atomic_or(i,v)
-#define atomic_xor(i,v) (void)ia64_atomic_xor(i,v)
+#define atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v)
+#define atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v)
+#define atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v)
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP
#define ATOMIC64_OP(op, c_op) \
@@ -94,8 +143,27 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \
return new; \
}
-ATOMIC64_OP(add, +)
-ATOMIC64_OP(sub, -)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static __inline__ long \
+ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \
+{ \
+ __s64 old, new; \
+ CMPXCHG_BUGCHECK_DECL \
+ \
+ do { \
+ CMPXCHG_BUGCHECK(v); \
+ old = atomic64_read(v); \
+ new = old c_op i; \
+ } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
+ return old; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(add, +)
+ATOMIC64_OPS(sub, -)
#define atomic64_add_return(i,v) \
({ \
@@ -121,14 +189,44 @@ ATOMIC64_OP(sub, -)
: ia64_atomic64_sub(__ia64_asr_i, v); \
})
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define atomic64_fetch_add(i,v) \
+({ \
+ long __ia64_aar_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \
+ || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \
+ || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \
+ || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \
+ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \
+ : ia64_atomic64_fetch_add(__ia64_aar_i, v); \
+})
+
+#define atomic64_fetch_sub(i,v) \
+({ \
+ long __ia64_asr_i = (i); \
+ (__builtin_constant_p(i) \
+ && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \
+ || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \
+ || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \
+ || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \
+ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \
+ : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \
+})
+
+ATOMIC64_FETCH_OP(and, &)
+ATOMIC64_FETCH_OP(or, |)
+ATOMIC64_FETCH_OP(xor, ^)
+
+#define atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v)
+#define atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v)
+#define atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v)
-#define atomic64_and(i,v) (void)ia64_atomic64_and(i,v)
-#define atomic64_or(i,v) (void)ia64_atomic64_or(i,v)
-#define atomic64_xor(i,v) (void)ia64_atomic64_xor(i,v)
+#define atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v)
+#define atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v)
+#define atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 8b23e070b844..8fa98dd303b4 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -40,7 +40,7 @@
static inline void
__down_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+ long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
if (result < 0)
rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_acq(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
return old;
}
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
static inline void
__up_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+ long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
__down_read_trylock (struct rw_semaphore *sem)
{
long tmp;
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+ while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+ if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
return 1;
}
}
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
static inline int
__down_write_trylock (struct rw_semaphore *sem)
{
- long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+ RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
@@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_WAITING_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (old < 0)
rwsem_downgrade_wake(sem);
}
-/*
- * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 45698cd15b7b..ca9e76149a4a 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -15,6 +15,8 @@
#include <linux/atomic.h>
#include <asm/intrinsics.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#define arch_spin_lock_init(x) ((x)->lock = 0)
@@ -86,6 +88,8 @@ static __always_inline void __ticket_spin_unlock_wait(arch_spinlock_t *lock)
return;
cpu_relax();
}
+
+ smp_acquire__after_ctrl_dep();
}
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index ea35160d632b..640cc1c7099f 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -89,16 +89,44 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int result, val; \
+ \
+ local_irq_save(flags); \
+ __asm__ __volatile__ ( \
+ "# atomic_fetch_" #op " \n\t" \
+ DCACHE_CLEAR("%0", "r4", "%2") \
+ M32R_LOCK" %1, @%2; \n\t" \
+ "mv %0, %1 \n\t" \
+ #op " %1, %3; \n\t" \
+ M32R_UNLOCK" %1, @%2; \n\t" \
+ : "=&r" (result), "=&r" (val) \
+ : "r" (&v->counter), "r" (i) \
+ : "memory" \
+ __ATOMIC_CLOBBER \
+ ); \
+ local_irq_restore(flags); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index fa13694eaae3..323c7fc953cd 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -13,6 +13,8 @@
#include <linux/atomic.h>
#include <asm/dcache_clear.h>
#include <asm/page.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -27,8 +29,11 @@
#define arch_spin_is_locked(x) (*(volatile int *)(&(x)->slock) <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, VAL > 0);
+}
/**
* arch_spin_trylock - Try spin lock and return a result
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 4858178260f9..cf4c3a7b1a45 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -53,6 +53,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return t; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int t, tmp; \
+ \
+ __asm__ __volatile__( \
+ "1: movel %2,%1\n" \
+ " " #asm_op "l %3,%1\n" \
+ " casl %2,%1,%0\n" \
+ " jne 1b" \
+ : "+m" (*v), "=&d" (t), "=&d" (tmp) \
+ : "g" (i), "2" (atomic_read(v))); \
+ return tmp; \
+}
+
#else
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
@@ -68,20 +83,41 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return t; \
}
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned long flags; \
+ int t; \
+ \
+ local_irq_save(flags); \
+ t = v->counter; \
+ v->counter c_op i; \
+ local_irq_restore(flags); \
+ \
+ return t; \
+}
+
#endif /* CONFIG_RMW_INSNS */
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, add)
ATOMIC_OPS(sub, -=, sub)
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, eor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, eor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 88fa25fae8bd..def2c642f053 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -69,16 +69,44 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int result, temp; \
+ \
+ smp_mb(); \
+ \
+ asm volatile ( \
+ "1: LNKGETD %1, [%2]\n" \
+ " " #op " %0, %1, %3\n" \
+ " LNKSETD [%2], %0\n" \
+ " DEFR %0, TXSTAT\n" \
+ " ANDT %0, %0, #HI(0x3f000000)\n" \
+ " CMPT %0, #HI(0x02000000)\n" \
+ " BNZ 1b\n" \
+ : "=&d" (temp), "=&d" (result) \
+ : "da" (&v->counter), "bd" (i) \
+ : "cc"); \
+ \
+ smp_mb(); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 0295d9b8d5bf..6c1380a8a0d4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -64,15 +64,40 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return result; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long result; \
+ unsigned long flags; \
+ \
+ __global_lock1(flags); \
+ result = v->counter; \
+ fence(); \
+ v->counter c_op i; \
+ __global_unlock1(flags); \
+ \
+ return result; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
index 86a7cf3d1386..c0c7a22be1ae 100644
--- a/arch/metag/include/asm/spinlock.h
+++ b/arch/metag/include/asm/spinlock.h
@@ -1,14 +1,24 @@
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
#ifdef CONFIG_METAG_ATOMICITY_LOCK1
#include <asm/spinlock_lock1.h>
#else
#include <asm/spinlock_lnkget.h>
#endif
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+/*
+ * both lock1 and lnkget are test-and-set spinlocks with 0 unlocked and 1
+ * locked.
+ */
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 835b402e4574..0ab176bdb8e8 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -66,7 +66,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
" " #asm_op " %0, %2 \n" \
" sc %0, %1 \n" \
" .set mips0 \n" \
- : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \
: "Ir" (i)); \
} while (unlikely(!temp)); \
} else { \
@@ -79,12 +79,10 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \
}
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
-static __inline__ int atomic_##op##_return(int i, atomic_t * v) \
+static __inline__ int atomic_##op##_return_relaxed(int i, atomic_t * v) \
{ \
int result; \
\
- smp_mb__before_llsc(); \
- \
if (kernel_uses_llsc && R10000_LLSC_WAR) { \
int temp; \
\
@@ -125,23 +123,84 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \
raw_local_irq_restore(flags); \
} \
\
- smp_llsc_mb(); \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
+static __inline__ int atomic_fetch_##op##_relaxed(int i, atomic_t * v) \
+{ \
+ int result; \
+ \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ int temp; \
+ \
+ __asm__ __volatile__( \
+ " .set arch=r4000 \n" \
+ "1: ll %1, %2 # atomic_fetch_" #op " \n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " sc %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+ " move %0, %1 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ int temp; \
+ \
+ do { \
+ __asm__ __volatile__( \
+ " .set "MIPS_ISA_LEVEL" \n" \
+ " ll %1, %2 # atomic_fetch_" #op " \n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " sc %0, %2 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } while (unlikely(!result)); \
+ \
+ result = temp; \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ } \
\
return result; \
}
#define ATOMIC_OPS(op, c_op, asm_op) \
ATOMIC_OP(op, c_op, asm_op) \
- ATOMIC_OP_RETURN(op, c_op, asm_op)
+ ATOMIC_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
ATOMIC_OPS(add, +=, addu)
ATOMIC_OPS(sub, -=, subu)
-ATOMIC_OP(and, &=, and)
-ATOMIC_OP(or, |=, or)
-ATOMIC_OP(xor, ^=, xor)
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op) \
+ ATOMIC_OP(op, c_op, asm_op) \
+ ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -362,12 +421,10 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \
}
#define ATOMIC64_OP_RETURN(op, c_op, asm_op) \
-static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
+static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \
{ \
long result; \
\
- smp_mb__before_llsc(); \
- \
if (kernel_uses_llsc && R10000_LLSC_WAR) { \
long temp; \
\
@@ -409,22 +466,85 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \
raw_local_irq_restore(flags); \
} \
\
- smp_llsc_mb(); \
+ return result; \
+}
+
+#define ATOMIC64_FETCH_OP(op, c_op, asm_op) \
+static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \
+{ \
+ long result; \
+ \
+ if (kernel_uses_llsc && R10000_LLSC_WAR) { \
+ long temp; \
+ \
+ __asm__ __volatile__( \
+ " .set arch=r4000 \n" \
+ "1: lld %1, %2 # atomic64_fetch_" #op "\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " scd %0, %2 \n" \
+ " beqzl %0, 1b \n" \
+ " move %0, %1 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "+" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i)); \
+ } else if (kernel_uses_llsc) { \
+ long temp; \
+ \
+ do { \
+ __asm__ __volatile__( \
+ " .set "MIPS_ISA_LEVEL" \n" \
+ " lld %1, %2 # atomic64_fetch_" #op "\n" \
+ " " #asm_op " %0, %1, %3 \n" \
+ " scd %0, %2 \n" \
+ " .set mips0 \n" \
+ : "=&r" (result), "=&r" (temp), \
+ "=" GCC_OFF_SMALL_ASM() (v->counter) \
+ : "Ir" (i), GCC_OFF_SMALL_ASM() (v->counter) \
+ : "memory"); \
+ } while (unlikely(!result)); \
+ \
+ result = temp; \
+ } else { \
+ unsigned long flags; \
+ \
+ raw_local_irq_save(flags); \
+ result = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ } \
\
return result; \
}
#define ATOMIC64_OPS(op, c_op, asm_op) \
ATOMIC64_OP(op, c_op, asm_op) \
- ATOMIC64_OP_RETURN(op, c_op, asm_op)
+ ATOMIC64_OP_RETURN(op, c_op, asm_op) \
+ ATOMIC64_FETCH_OP(op, c_op, asm_op)
ATOMIC64_OPS(add, +=, daddu)
ATOMIC64_OPS(sub, -=, dsubu)
-ATOMIC64_OP(and, &=, and)
-ATOMIC64_OP(or, |=, or)
-ATOMIC64_OP(xor, ^=, xor)
+
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op, asm_op) \
+ ATOMIC64_OP(op, c_op, asm_op) \
+ ATOMIC64_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC64_OPS(and, &=, and)
+ATOMIC64_OPS(or, |=, or)
+ATOMIC64_OPS(xor, ^=, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 40196bebe849..f485afe51514 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -12,6 +12,7 @@
#include <linux/compiler.h>
#include <asm/barrier.h>
+#include <asm/processor.h>
#include <asm/compiler.h>
#include <asm/war.h>
@@ -48,8 +49,22 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- while (arch_spin_is_locked(x)) { cpu_relax(); }
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ u16 owner = READ_ONCE(lock->h.serving_now);
+ smp_rmb();
+ for (;;) {
+ arch_spinlock_t tmp = READ_ONCE(*lock);
+
+ if (tmp.h.serving_now == tmp.h.ticket ||
+ tmp.h.serving_now != owner)
+ break;
+
+ cpu_relax();
+ }
+ smp_acquire__after_ctrl_dep();
+}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index ce318d5ab23b..36389efd45e8 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -84,16 +84,41 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return retval; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int retval, status; \
+ \
+ asm volatile( \
+ "1: mov %4,(_AAR,%3) \n" \
+ " mov (_ADR,%3),%1 \n" \
+ " mov %1,%0 \n" \
+ " " #op " %5,%0 \n" \
+ " mov %0,(_ADR,%3) \n" \
+ " mov (_ADR,%3),%0 \n" /* flush */ \
+ " mov (_ASR,%3),%0 \n" \
+ " or %0,%0 \n" \
+ " bne 1b \n" \
+ : "=&r"(status), "=&r"(retval), "=m"(v->counter) \
+ : "a"(ATOMIC_OPS_BASE_ADDR), "r"(&v->counter), "r"(i) \
+ : "memory", "cc"); \
+ return retval; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 1ae580f38933..9c7b8f7942d8 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -12,6 +12,8 @@
#define _ASM_SPINLOCK_H
#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#include <asm/rwlock.h>
#include <asm/page.h>
@@ -23,7 +25,11 @@
*/
#define arch_spin_is_locked(x) (*(volatile signed char *)(&(x)->slock) != 0)
-#define arch_spin_unlock_wait(x) do { barrier(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 1d109990a022..5394b9c5f914 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -121,16 +121,39 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \
return ret; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static __inline__ int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long flags; \
+ int ret; \
+ \
+ _atomic_spin_lock_irqsave(v, flags); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ _atomic_spin_unlock_irqrestore(v, flags); \
+ \
+ return ret; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -185,15 +208,39 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v) \
return ret; \
}
-#define ATOMIC64_OPS(op, c_op) ATOMIC64_OP(op, c_op) ATOMIC64_OP_RETURN(op, c_op)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ s64 ret; \
+ \
+ _atomic_spin_lock_irqsave(v, flags); \
+ ret = v->counter; \
+ v->counter c_op i; \
+ _atomic_spin_unlock_irqrestore(v, flags); \
+ \
+ return ret; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
ATOMIC64_OPS(add, +=)
ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 64f2992e439f..e32936cd7f10 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -13,8 +13,13 @@ static inline int arch_spin_is_locked(arch_spinlock_t *x)
}
#define arch_spin_lock(lock) arch_spin_lock_flags(lock, 0)
-#define arch_spin_unlock_wait(x) \
- do { cpu_relax(); } while (arch_spin_is_locked(x))
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *x)
+{
+ volatile unsigned int *a = __ldcw_align(x);
+
+ smp_cond_load_acquire(a, VAL);
+}
static inline void arch_spin_lock_flags(arch_spinlock_t *x,
unsigned long flags)
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ae0751ef8788..f08d567e0ca4 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -78,21 +78,53 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
return t; \
}
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
+static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+{ \
+ int res, t; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
+ #asm_op " %1,%3,%0\n" \
+ PPC405_ERR77(0, %4) \
+" stwcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return res; \
+}
+
#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP(op, asm_op) \
- ATOMIC_OP_RETURN_RELAXED(op, asm_op)
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
-ATOMIC_OP(and, and)
-ATOMIC_OP(or, or)
-ATOMIC_OP(xor, xor)
-
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op) \
+ ATOMIC_OP(op, asm_op) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC_OPS(and, and)
+ATOMIC_OPS(or, or)
+ATOMIC_OPS(xor, xor)
+
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
@@ -329,20 +361,53 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \
return t; \
}
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
+static inline long \
+atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \
+{ \
+ long res, t; \
+ \
+ __asm__ __volatile__( \
+"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
+ #asm_op " %1,%3,%0\n" \
+" stdcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return res; \
+}
+
#define ATOMIC64_OPS(op, asm_op) \
ATOMIC64_OP(op, asm_op) \
- ATOMIC64_OP_RETURN_RELAXED(op, asm_op)
+ ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
-ATOMIC64_OP(and, and)
-ATOMIC64_OP(or, or)
-ATOMIC64_OP(xor, xor)
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
+
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
+
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+
#undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 911064aa59b2..d28cc2f5b7b2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -93,6 +93,11 @@ static inline int atomic_add_return(int i, atomic_t *v)
return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
}
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+}
+
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -114,22 +119,27 @@ static inline void atomic_add(int i, atomic_t *v)
#define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0)
#define atomic_sub(_i, _v) atomic_add(-(int)(_i), _v)
#define atomic_sub_return(_i, _v) atomic_add_return(-(int)(_i), _v)
+#define atomic_fetch_sub(_i, _v) atomic_fetch_add(-(int)(_i), _v)
#define atomic_sub_and_test(_i, _v) (atomic_sub_return(_i, _v) == 0)
#define atomic_dec(_v) atomic_sub(1, _v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
-#define ATOMIC_OP(op, OP) \
+#define ATOMIC_OPS(op, OP) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \
+} \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \
}
-ATOMIC_OP(and, AND)
-ATOMIC_OP(or, OR)
-ATOMIC_OP(xor, XOR)
+ATOMIC_OPS(and, AND)
+ATOMIC_OPS(or, OR)
+ATOMIC_OPS(xor, XOR)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
@@ -236,6 +246,11 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
}
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+}
+
static inline void atomic64_add(long long i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -264,17 +279,21 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
return old;
}
-#define ATOMIC64_OP(op, OP) \
+#define ATOMIC64_OPS(op, OP) \
static inline void atomic64_##op(long i, atomic64_t *v) \
{ \
__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \
+} \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
}
-ATOMIC64_OP(and, AND)
-ATOMIC64_OP(or, OR)
-ATOMIC64_OP(xor, XOR)
+ATOMIC64_OPS(and, AND)
+ATOMIC64_OPS(or, OR)
+ATOMIC64_OPS(xor, XOR)
-#undef ATOMIC64_OP
+#undef ATOMIC64_OPS
#undef __ATOMIC64_LOOP
static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
@@ -315,6 +334,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v)
#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v)
#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
#define atomic64_dec(_v) atomic64_sub(1, _v)
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index c75e4471e618..597e7e96b59e 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
- return new;
-}
-
#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 63ebf37d3143..7e9e09f600fa 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -10,6 +10,8 @@
#define __ASM_SPINLOCK_H
#include <linux/smp.h>
+#include <asm/barrier.h>
+#include <asm/processor.h>
#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
@@ -97,6 +99,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
while (arch_spin_is_locked(lock))
arch_spin_relax(lock);
+ smp_acquire__after_ctrl_dep();
}
/*
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index b94df40e5f2d..d755e96c3064 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -43,16 +43,42 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return tmp; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int res, tmp; \
+ \
+ __asm__ __volatile__ ( \
+ " .align 2 \n\t" \
+ " mova 1f, r0 \n\t" /* r0 = end point */ \
+ " mov r15, r1 \n\t" /* r1 = saved sp */ \
+ " mov #-6, r15 \n\t" /* LOGIN: r15 = size */ \
+ " mov.l @%2, %0 \n\t" /* load old value */ \
+ " mov %0, %1 \n\t" /* save old value */ \
+ " " #op " %3, %0 \n\t" /* $op */ \
+ " mov.l %0, @%2 \n\t" /* store new value */ \
+ "1: mov r1, r15 \n\t" /* LOGOUT */ \
+ : "=&r" (tmp), "=&r" (res), "+r" (v) \
+ : "r" (i) \
+ : "memory" , "r0", "r1"); \
+ \
+ return res; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 23fcdad5773e..8e2da5fa0178 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -33,15 +33,38 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long temp, flags; \
+ \
+ raw_local_irq_save(flags); \
+ temp = v->counter; \
+ v->counter c_op i; \
+ raw_local_irq_restore(flags); \
+ \
+ return temp; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_OP_RETURN(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
ATOMIC_OPS(add, +=)
ATOMIC_OPS(sub, -=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op, c_op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &=)
+ATOMIC_OPS(or, |=)
+ATOMIC_OPS(xor, ^=)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 33d34b16d4d6..caea2c45f6c2 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -48,15 +48,39 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
return temp; \
}
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long res, temp; \
+ \
+ __asm__ __volatile__ ( \
+"1: movli.l @%3, %0 ! atomic_fetch_" #op " \n" \
+" mov %0, %1 \n" \
+" " #op " %2, %0 \n" \
+" movco.l %0, @%3 \n" \
+" bf 1b \n" \
+" synco \n" \
+ : "=&z" (temp), "=&z" (res) \
+ : "r" (i), "r" (&v->counter) \
+ : "t"); \
+ \
+ return res; \
+}
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sh/include/asm/spinlock.h b/arch/sh/include/asm/spinlock.h
index bdc0f3b6c56a..416834b60ad0 100644
--- a/arch/sh/include/asm/spinlock.h
+++ b/arch/sh/include/asm/spinlock.h
@@ -19,14 +19,20 @@
#error "Need movli.l/movco.l for spinlocks"
#endif
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
/*
* Your basic SMP spinlocks, allowing only a single CPU anywhere
*/
#define arch_spin_is_locked(x) ((x)->lock <= 0)
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
-#define arch_spin_unlock_wait(x) \
- do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, VAL > 0);
+}
/*
* Simple spin lock operations. There are two variants, one clears IRQ's
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 7dcbebbcaec6..ee3f11c43cda 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -20,9 +20,10 @@
#define ATOMIC_INIT(i) { (i) }
int atomic_add_return(int, atomic_t *);
-void atomic_and(int, atomic_t *);
-void atomic_or(int, atomic_t *);
-void atomic_xor(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
int atomic_cmpxchg(atomic_t *, int, int);
int atomic_xchg(atomic_t *, int);
int __atomic_add_unless(atomic_t *, int, int);
@@ -35,7 +36,13 @@ void atomic_set(atomic_t *, int);
#define atomic_inc(v) ((void)atomic_add_return( 1, (v)))
#define atomic_dec(v) ((void)atomic_add_return( -1, (v)))
+#define atomic_and(i, v) ((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v) ((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v) ((void)atomic_fetch_xor((i), (v)))
+
#define atomic_sub_return(i, v) (atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v) (atomic_fetch_add (-(int)(i), (v)))
+
#define atomic_inc_return(v) (atomic_add_return( 1, (v)))
#define atomic_dec_return(v) (atomic_add_return( -1, (v)))
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f2fbf9e16faf..24827a3f733a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -28,16 +28,24 @@ void atomic64_##op(long, atomic64_t *);
int atomic_##op##_return(int, atomic_t *); \
long atomic64_##op##_return(long, atomic64_t *);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+int atomic_fetch_##op(int, atomic_t *); \
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
index bcc98fc35281..d9c5876c6121 100644
--- a/arch/sparc/include/asm/spinlock_32.h
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -9,12 +9,15 @@
#ifndef __ASSEMBLY__
#include <asm/psr.h>
+#include <asm/barrier.h>
#include <asm/processor.h> /* for cpu_relax */
#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
index 968917694978..87990b7c6b0d 100644
--- a/arch/sparc/include/asm/spinlock_64.h
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -8,6 +8,9 @@
#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/barrier.h>
+
/* To get debugging spinlocks which detect and catch
* deadlock situations, set CONFIG_DEBUG_SPINLOCK
* and rebuild your kernel.
@@ -23,9 +26,10 @@
#define arch_spin_is_locked(lp) ((lp)->lock != 0)
-#define arch_spin_unlock_wait(lp) \
- do { rmb(); \
- } while((lp)->lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->lock, !VAL);
+}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index b9d63c0a7aab..2c373329d5cb 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,39 +27,44 @@ static DEFINE_SPINLOCK(dummy);
#endif /* SMP */
-#define ATOMIC_OP_RETURN(op, c_op) \
-int atomic_##op##_return(int i, atomic_t *v) \
+#define ATOMIC_FETCH_OP(op, c_op) \
+int atomic_fetch_##op(int i, atomic_t *v) \
{ \
int ret; \
unsigned long flags; \
spin_lock_irqsave(ATOMIC_HASH(v), flags); \
\
- ret = (v->counter c_op i); \
+ ret = v->counter; \
+ v->counter c_op i; \
\
spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \
return ret; \
} \
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(atomic_fetch_##op);
-#define ATOMIC_OP(op, c_op) \
-void atomic_##op(int i, atomic_t *v) \
+#define ATOMIC_OP_RETURN(op, c_op) \
+int atomic_##op##_return(int i, atomic_t *v) \
{ \
+ int ret; \
unsigned long flags; \
spin_lock_irqsave(ATOMIC_HASH(v), flags); \
\
- v->counter c_op i; \
+ ret = (v->counter c_op i); \
\
spin_unlock_irqrestore(ATOMIC_HASH(v), flags); \
+ return ret; \
} \
-EXPORT_SYMBOL(atomic_##op);
+EXPORT_SYMBOL(atomic_##op##_return);
ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP(and, &=)
-ATOMIC_OP(or, |=)
-ATOMIC_OP(xor, ^=)
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
int atomic_xchg(atomic_t *v, int new)
{
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index d6b0363f345b..a5c5a0279ccc 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -9,10 +9,11 @@
.text
- /* Two versions of the atomic routines, one that
+ /* Three versions of the atomic routines, one that
* does not return a value and does not perform
- * memory barriers, and a second which returns
- * a value and does the barriers.
+ * memory barriers, and a two which return
+ * a value, the new and old value resp. and does the
+ * barriers.
*/
#define ATOMIC_OP(op) \
@@ -43,15 +44,34 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
2: BACKOFF_SPIN(%o2, %o3, 1b); \
ENDPROC(atomic_##op##_return);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
+ BACKOFF_SETUP(%o2); \
+1: lduw [%o1], %g1; \
+ op %g1, %o0, %g7; \
+ cas [%o1], %g1, %g7; \
+ cmp %g1, %g7; \
+ bne,pn %icc, BACKOFF_LABEL(2f, 1b); \
+ nop; \
+ retl; \
+ sra %g1, 0, %o0; \
+2: BACKOFF_SPIN(%o2, %o3, 1b); \
+ENDPROC(atomic_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
@@ -83,15 +103,34 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */ \
2: BACKOFF_SPIN(%o2, %o3, 1b); \
ENDPROC(atomic64_##op##_return);
-#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op)
+#define ATOMIC64_FETCH_OP(op) \
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */ \
+ BACKOFF_SETUP(%o2); \
+1: ldx [%o1], %g1; \
+ op %g1, %o0, %g7; \
+ casx [%o1], %g1, %g7; \
+ cmp %g1, %g7; \
+ bne,pn %xcc, BACKOFF_LABEL(2f, 1b); \
+ nop; \
+ retl; \
+ mov %g1, %o0; \
+2: BACKOFF_SPIN(%o2, %o3, 1b); \
+ENDPROC(atomic64_fetch_##op);
+
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
ATOMIC64_OPS(add)
ATOMIC64_OPS(sub)
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_FETCH_OP(op)
+
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 8eb454cfe05c..de5e97817bdb 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -107,15 +107,24 @@ EXPORT_SYMBOL(atomic64_##op);
EXPORT_SYMBOL(atomic_##op##_return); \
EXPORT_SYMBOL(atomic64_##op##_return);
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op) \
+EXPORT_SYMBOL(atomic_fetch_##op); \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 9fc0107a9c5e..8dda3c8ff5ab 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -46,6 +46,8 @@ static inline int atomic_read(const atomic_t *v)
*/
#define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v))
+#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v))
+
/**
* atomic_sub - subtract integer from atomic variable
* @i: integer value to subtract
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index d320ce253d86..a93774255136 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v)
_atomic_xchg_add(&v->counter, i);
}
-#define ATOMIC_OP(op) \
-unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+#define ATOMIC_OPS(op) \
+unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
- _atomic_##op((unsigned long *)&v->counter, i); \
+ _atomic_fetch_##op((unsigned long *)&v->counter, i); \
+} \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ smp_mb(); \
+ return _atomic_fetch_##op((unsigned long *)&v->counter, i); \
}
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
-#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ smp_mb();
+ return _atomic_xchg_add(&v->counter, i);
+}
/**
* atomic_add_return - add integer and return
@@ -126,16 +137,29 @@ static inline void atomic64_add(long long i, atomic64_t *v)
_atomic64_xchg_add(&v->counter, i);
}
-#define ATOMIC64_OP(op) \
-long long _atomic64_##op(long long *v, long long n); \
+#define ATOMIC64_OPS(op) \
+long long _atomic64_fetch_##op(long long *v, long long n); \
static inline void atomic64_##op(long long i, atomic64_t *v) \
{ \
- _atomic64_##op(&v->counter, i); \
+ _atomic64_fetch_##op(&v->counter, i); \
+} \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ smp_mb(); \
+ return _atomic64_fetch_##op(&v->counter, i); \
}
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
+
+#undef ATOMIC64_OPS
+
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ smp_mb();
+ return _atomic64_xchg_add(&v->counter, i);
+}
/**
* atomic64_add_return - add integer and return
@@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n)
#define atomic64_inc_return(v) atomic64_add_return(1LL, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
#define atomic64_sub(i, v) atomic64_add(-(i), (v))
#define atomic64_dec(v) atomic64_sub(1LL, (v))
@@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
-
#endif /* !__ASSEMBLY__ */
/*
@@ -242,16 +266,16 @@ struct __get_user {
unsigned long val;
int err;
};
-extern struct __get_user __atomic_cmpxchg(volatile int *p,
+extern struct __get_user __atomic32_cmpxchg(volatile int *p,
int *lock, int o, int n);
-extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_xchg_add_unless(volatile int *p,
int *lock, int o, int n);
-extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
-extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n);
extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
long long o, long long n);
extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
@@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
long long n);
extern long long __atomic64_xchg_add_unless(volatile long long *p,
int *lock, long long o, long long n);
-extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
-extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n);
/* Return failure from the atomic wrappers. */
struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index b0531a623653..4cefa0c9fd81 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,11 +32,6 @@
* on any routine which updates memory and returns a value.
*/
-static inline void atomic_add(int i, atomic_t *v)
-{
- __insn_fetchadd4((void *)&v->counter, i);
-}
-
/*
* Note a subtlety of the locking here. We are required to provide a
* full memory barrier before and after the operation. However, we
@@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v)
return val;
}
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#define ATOMIC_OPS(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int val; \
+ smp_mb(); \
+ val = __insn_fetch##op##4((void *)&v->counter, i); \
+ smp_mb(); \
+ return val; \
+} \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ __insn_fetch##op##4((void *)&v->counter, i); \
+}
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+
+#undef ATOMIC_OPS
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
{
int guess, oldval = v->counter;
+ smp_mb();
do {
- if (oldval == u)
- break;
guess = oldval;
- oldval = cmpxchg(&v->counter, guess, guess + a);
+ __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+ oldval = __insn_cmpexch4(&v->counter, guess ^ i);
} while (guess != oldval);
+ smp_mb();
return oldval;
}
-static inline void atomic_and(int i, atomic_t *v)
-{
- __insn_fetchand4((void *)&v->counter, i);
-}
-
-static inline void atomic_or(int i, atomic_t *v)
-{
- __insn_fetchor4((void *)&v->counter, i);
-}
-
static inline void atomic_xor(int i, atomic_t *v)
{
int guess, oldval = v->counter;
@@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v)
} while (guess != oldval);
}
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = cmpxchg(&v->counter, guess, guess + a);
+ } while (guess != oldval);
+ return oldval;
+}
+
/* Now the true 64-bit operations. */
#define ATOMIC64_INIT(i) { (i) }
@@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v)
#define atomic64_read(v) READ_ONCE((v)->counter)
#define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i))
-static inline void atomic64_add(long i, atomic64_t *v)
-{
- __insn_fetchadd((void *)&v->counter, i);
-}
-
static inline long atomic64_add_return(long i, atomic64_t *v)
{
int val;
@@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v)
return val;
}
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+#define ATOMIC64_OPS(op) \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ long val; \
+ smp_mb(); \
+ val = __insn_fetch##op((void *)&v->counter, i); \
+ smp_mb(); \
+ return val; \
+} \
+static inline void atomic64_##op(long i, atomic64_t *v) \
+{ \
+ __insn_fetch##op((void *)&v->counter, i); \
+}
+
+ATOMIC64_OPS(add)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+
+#undef ATOMIC64_OPS
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
{
long guess, oldval = v->counter;
+ smp_mb();
do {
- if (oldval == u)
- break;
guess = oldval;
- oldval = cmpxchg(&v->counter, guess, guess + a);
+ __insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+ oldval = __insn_cmpexch(&v->counter, guess ^ i);
} while (guess != oldval);
- return oldval != u;
-}
-
-static inline void atomic64_and(long i, atomic64_t *v)
-{
- __insn_fetchand((void *)&v->counter, i);
-}
-
-static inline void atomic64_or(long i, atomic64_t *v)
-{
- __insn_fetchor((void *)&v->counter, i);
+ smp_mb();
+ return oldval;
}
static inline void atomic64_xor(long i, atomic64_t *v)
@@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v)
} while (guess != oldval);
}
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+ long guess, oldval = v->counter;
+ do {
+ if (oldval == u)
+ break;
+ guess = oldval;
+ oldval = cmpxchg(&v->counter, guess, guess + a);
+ } while (guess != oldval);
+ return oldval != u;
+}
+
#define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v))
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#define atomic64_sub(i, v) atomic64_add(-(i), (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index d55222806c2f..4c419ab95ab7 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -87,6 +87,13 @@ mb_incoherent(void)
#define __smp_mb__after_atomic() __smp_mb()
#endif
+/*
+ * The TILE architecture does not do speculative reads; this ensures
+ * that a control dependency also orders against loads and already provides
+ * a LOAD->{LOAD,STORE} order and can forgo the additional RMB.
+ */
+#define smp_acquire__after_ctrl_dep() barrier()
+
#include <asm-generic/barrier.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index bbf7b666f21d..d1406a95f6b7 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -19,9 +19,9 @@
#include <asm/barrier.h>
/* Tile-specific routines to support <asm/bitops.h>. */
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask);
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask);
/**
* set_bit - Atomically set a bit in memory
@@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask);
*/
static inline void set_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
*/
static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
*/
static inline void change_bit(unsigned nr, volatile unsigned long *addr)
{
- _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
+ _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr));
}
/**
@@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr)
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_or(addr, mask) & mask) != 0;
+ return (_atomic_fetch_or(addr, mask) & mask) != 0;
}
/**
@@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr)
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_andn(addr, mask) & mask) != 0;
+ return (_atomic_fetch_andn(addr, mask) & mask) != 0;
}
/**
@@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr,
unsigned long mask = BIT_MASK(nr);
addr += BIT_WORD(nr);
smp_mb(); /* barrier for proper semantics */
- return (_atomic_xor(addr, mask) & mask) != 0;
+ return (_atomic_fetch_xor(addr, mask) & mask) != 0;
}
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 1a6ef1b69cb1..e64a1b75fc38 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -80,16 +80,16 @@
ret = gu.err; \
}
-#define __futex_set() __futex_call(__atomic_xchg)
-#define __futex_add() __futex_call(__atomic_xchg_add)
-#define __futex_or() __futex_call(__atomic_or)
-#define __futex_andn() __futex_call(__atomic_andn)
-#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_set() __futex_call(__atomic32_xchg)
+#define __futex_add() __futex_call(__atomic32_xchg_add)
+#define __futex_or() __futex_call(__atomic32_fetch_or)
+#define __futex_andn() __futex_call(__atomic32_fetch_andn)
+#define __futex_xor() __futex_call(__atomic32_fetch_xor)
#define __futex_cmpxchg() \
{ \
- struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
- lock, oldval, oparg); \
+ struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \
+ lock, oldval, oparg); \
val = gu.val; \
ret = gu.err; \
}
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 298df1e9912a..f8128800dbf5 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -61,13 +61,13 @@ static inline int *__atomic_setup(volatile void *v)
int _atomic_xchg(int *v, int n)
{
- return __atomic_xchg(v, __atomic_setup(v), n).val;
+ return __atomic32_xchg(v, __atomic_setup(v), n).val;
}
EXPORT_SYMBOL(_atomic_xchg);
int _atomic_xchg_add(int *v, int i)
{
- return __atomic_xchg_add(v, __atomic_setup(v), i).val;
+ return __atomic32_xchg_add(v, __atomic_setup(v), i).val;
}
EXPORT_SYMBOL(_atomic_xchg_add);
@@ -78,39 +78,39 @@ int _atomic_xchg_add_unless(int *v, int a, int u)
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
+ return __atomic32_xchg_add_unless(v, __atomic_setup(v), u, a).val;
}
EXPORT_SYMBOL(_atomic_xchg_add_unless);
int _atomic_cmpxchg(int *v, int o, int n)
{
- return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
+ return __atomic32_cmpxchg(v, __atomic_setup(v), o, n).val;
}
EXPORT_SYMBOL(_atomic_cmpxchg);
-unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_or((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_or);
+EXPORT_SYMBOL(_atomic_fetch_or);
-unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_and(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_and((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_and);
+EXPORT_SYMBOL(_atomic_fetch_and);
-unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_andn((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_andn);
+EXPORT_SYMBOL(_atomic_fetch_andn);
-unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask)
{
- return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+ return __atomic32_fetch_xor((int *)p, __atomic_setup(p), mask).val;
}
-EXPORT_SYMBOL(_atomic_xor);
+EXPORT_SYMBOL(_atomic_fetch_xor);
long long _atomic64_xchg(long long *v, long long n)
@@ -142,23 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
}
EXPORT_SYMBOL(_atomic64_cmpxchg);
-long long _atomic64_and(long long *v, long long n)
+long long _atomic64_fetch_and(long long *v, long long n)
{
- return __atomic64_and(v, __atomic_setup(v), n);
+ return __atomic64_fetch_and(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_and);
+EXPORT_SYMBOL(_atomic64_fetch_and);
-long long _atomic64_or(long long *v, long long n)
+long long _atomic64_fetch_or(long long *v, long long n)
{
- return __atomic64_or(v, __atomic_setup(v), n);
+ return __atomic64_fetch_or(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_or);
+EXPORT_SYMBOL(_atomic64_fetch_or);
-long long _atomic64_xor(long long *v, long long n)
+long long _atomic64_fetch_xor(long long *v, long long n)
{
- return __atomic64_xor(v, __atomic_setup(v), n);
+ return __atomic64_fetch_xor(v, __atomic_setup(v), n);
}
-EXPORT_SYMBOL(_atomic64_xor);
+EXPORT_SYMBOL(_atomic64_fetch_xor);
/*
* If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index f611265633d6..1a70e6c0f259 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -172,15 +172,20 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
.endif
.endm
-atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
-atomic_op _xchg, 32, "move r24, r2"
-atomic_op _xchg_add, 32, "add r24, r22, r2"
-atomic_op _xchg_add_unless, 32, \
+
+/*
+ * Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
+ */
+
+atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op 32_xchg, 32, "move r24, r2"
+atomic_op 32_xchg_add, 32, "add r24, r22, r2"
+atomic_op 32_xchg_add_unless, 32, \
"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
-atomic_op _or, 32, "or r24, r22, r2"
-atomic_op _and, 32, "and r24, r22, r2"
-atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
-atomic_op _xor, 32, "xor r24, r22, r2"
+atomic_op 32_fetch_or, 32, "or r24, r22, r2"
+atomic_op 32_fetch_and, 32, "and r24, r22, r2"
+atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
@@ -192,9 +197,9 @@ atomic_op 64_xchg_add_unless, 64, \
{ bbns r26, 3f; add r24, r22, r4 }; \
{ bbns r27, 3f; add r25, r23, r5 }; \
slt_u r26, r24, r22; add r25, r25, r26"
-atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
-atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
-atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
+atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
jrp lr /* happy backtracer */
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 88c2a53362e7..076c6cc43113 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
do {
delay_backoff(iterations++);
} while (READ_ONCE(lock->current_ticket) == curr);
+
+ /*
+ * The TILE architecture doesn't do read speculation; therefore
+ * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+ */
+ barrier();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index c8d1f94ff1fe..a4b5b2cbce93 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -76,6 +76,12 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
do {
delay_backoff(iterations++);
} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
+
+ /*
+ * The TILE architecture doesn't do read speculation; therefore
+ * a control dependency guarantees a LOAD->{LOAD,STORE} order.
+ */
+ barrier();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 3e8674288198..a58b99811105 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -171,6 +171,16 @@ static __always_inline int atomic_sub_return(int i, atomic_t *v)
#define atomic_inc_return(v) (atomic_add_return(1, v))
#define atomic_dec_return(v) (atomic_sub_return(1, v))
+static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+{
+ return xadd(&v->counter, i);
+}
+
+static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+{
+ return xadd(&v->counter, -i);
+}
+
static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
return cmpxchg(&v->counter, old, new);
@@ -190,10 +200,29 @@ static inline void atomic_##op(int i, atomic_t *v) \
: "memory"); \
}
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#define ATOMIC_FETCH_OP(op, c_op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ int old, val = atomic_read(v); \
+ for (;;) { \
+ old = atomic_cmpxchg(v, val, val c_op i); \
+ if (old == val) \
+ break; \
+ val = old; \
+ } \
+ return old; \
+}
+
+#define ATOMIC_OPS(op, c_op) \
+ ATOMIC_OP(op) \
+ ATOMIC_FETCH_OP(op, c_op)
+
+ATOMIC_OPS(and, &)
+ATOMIC_OPS(or , |)
+ATOMIC_OPS(xor, ^)
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP
/**
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a984111135b1..71d7705fb303 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -320,10 +320,29 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \
c = old; \
}
-ATOMIC64_OP(and, &)
-ATOMIC64_OP(or, |)
-ATOMIC64_OP(xor, ^)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
+{ \
+ long long old, c = 0; \
+ while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
+ c = old; \
+ return old; \
+}
+
+ATOMIC64_FETCH_OP(add, +)
+
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 037351022f54..70eed0e14553 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -158,6 +158,16 @@ static inline long atomic64_sub_return(long i, atomic64_t *v)
return atomic64_add_return(-i, v);
}
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
+{
+ return xadd(&v->counter, i);
+}
+
+static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+{
+ return xadd(&v->counter, -i);
+}
+
#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
@@ -229,10 +239,29 @@ static inline void atomic64_##op(long i, atomic64_t *v) \
: "memory"); \
}
-ATOMIC64_OP(and)
-ATOMIC64_OP(or)
-ATOMIC64_OP(xor)
+#define ATOMIC64_FETCH_OP(op, c_op) \
+static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
+{ \
+ long old, val = atomic64_read(v); \
+ for (;;) { \
+ old = atomic64_cmpxchg(v, val, val c_op i); \
+ if (old == val) \
+ break; \
+ val = old; \
+ } \
+ return old; \
+}
+
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &)
+ATOMIC64_OPS(or, |)
+ATOMIC64_OPS(xor, ^)
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..089ced4edbbc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
: "memory", "cc");
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
- : "+m" (sem->count)
- : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- return delta + xadd(&sem->count, delta);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index fd8017ce298a..e7a23f2a519a 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -98,6 +98,26 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return result; \
}
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ __asm__ __volatile__( \
+ "1: l32i %1, %3, 0\n" \
+ " wsr %1, scompare1\n" \
+ " " #op " %0, %1, %2\n" \
+ " s32c1i %0, %3, 0\n" \
+ " bne %0, %1, 1b\n" \
+ : "=&a" (result), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "memory" \
+ ); \
+ \
+ return result; \
+}
+
#else /* XCHAL_HAVE_S32C1I */
#define ATOMIC_OP(op) \
@@ -138,18 +158,42 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \
return vval; \
}
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t * v) \
+{ \
+ unsigned int tmp, vval; \
+ \
+ __asm__ __volatile__( \
+ " rsil a15,"__stringify(TOPLEVEL)"\n" \
+ " l32i %0, %3, 0\n" \
+ " " #op " %1, %0, %2\n" \
+ " s32i %1, %3, 0\n" \
+ " wsr a15, ps\n" \
+ " rsync\n" \
+ : "=&a" (vval), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "a15", "memory" \
+ ); \
+ \
+ return vval; \
+}
+
#endif /* XCHAL_HAVE_S32C1I */
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op)
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op) ATOMIC_OP_RETURN(op)
ATOMIC_OPS(add)
ATOMIC_OPS(sub)
-ATOMIC_OP(and)
-ATOMIC_OP(or)
-ATOMIC_OP(xor)
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
diff --git a/arch/xtensa/include/asm/spinlock.h b/arch/xtensa/include/asm/spinlock.h
index 1d95fa5dcd10..a36221cf6363 100644
--- a/arch/xtensa/include/asm/spinlock.h
+++ b/arch/xtensa/include/asm/spinlock.h
@@ -11,6 +11,9 @@
#ifndef _XTENSA_SPINLOCK_H
#define _XTENSA_SPINLOCK_H
+#include <asm/barrier.h>
+#include <asm/processor.h>
+
/*
* spinlock
*
@@ -29,8 +32,11 @@
*/
#define arch_spin_is_locked(x) ((x)->slock != 0)
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+ smp_cond_load_acquire(&lock->slock, !VAL);
+}
#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)