From 4e14a4d17a8cd66ccab180d32c977091922cfbed Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Feb 2010 00:57:28 +0000 Subject: powerpc: Use lwarx hint in spinlocks Recent versions of the PowerPC architecture added a hint bit to the larx instructions to differentiate between an atomic operation and a lock operation: > 0 Other programs might attempt to modify the word in storage addressed by EA > even if the subsequent Store Conditional succeeds. > > 1 Other programs will not attempt to modify the word in storage addressed by > EA until the program that has acquired the lock performs a subsequent store > releasing the lock. To avoid a binutils dependency this patch create macros for the extended lwarx format and uses it in the spinlock code. To test this change I used a simple test case that acquires and releases a global pthread mutex: pthread_mutex_lock(&mutex); pthread_mutex_unlock(&mutex); On a 32 core POWER6, running 32 test threads we spend almost all our time in the futex spinlock code: 94.37% perf [kernel] [k] ._raw_spin_lock | |--99.95%-- ._raw_spin_lock | | | |--63.29%-- .futex_wake | | | |--36.64%-- .futex_wait_setup Which is a good test for this patch. The results (in lock/unlock operations per second) are: before: 1538203 ops/sec after: 2189219 ops/sec An improvement of 42% A 32 core POWER7 improves even more: before: 1279529 ops/sec after: 2282076 ops/sec An improvement of 78% Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ppc-opcode.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc/include/asm/ppc-opcode.h') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ef9aa84cac5a..ecec76051184 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -24,6 +24,7 @@ #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a +#define PPC_INST_LWARX 0x7c000029 #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_LXVD2X 0x7c000698 #define PPC_INST_MCRXR 0x7c000400 @@ -55,15 +56,28 @@ #define __PPC_RA(a) (((a) & 0x1f) << 16) #define __PPC_RB(b) (((b) & 0x1f) << 11) #define __PPC_RS(s) (((s) & 0x1f) << 21) +#define __PPC_RT(s) __PPC_RS(s) #define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5)) #define __PPC_T_TLB(t) (((t) & 0x3) << 21) #define __PPC_WC(w) (((w) & 0x3) << 21) +/* + * Only use the larx hint bit on 64bit CPUs. Once we verify it doesn't have + * any side effects on all 32bit processors, we can do this all the time. + */ +#ifdef CONFIG_PPC64 +#define __PPC_EH(eh) (((eh) & 0x1) << 0) +#else +#define __PPC_EH(eh) 0 +#endif /* Deal with instructions that older assemblers aren't aware of */ #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ + __PPC_RT(t) | __PPC_RA(a) | \ + __PPC_RB(b) | __PPC_EH(eh)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ __PPC_RB(b)) #define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI) -- cgit v1.2.3 From 864b9e6fd76489aab422bac62162f57c52e06ed8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Feb 2010 01:02:36 +0000 Subject: powerpc: Use lwarx/ldarx hint in bit locks This patch implements the lwarx/ldarx hint bit for bit locks. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/asm-compat.h | 5 ++-- arch/powerpc/include/asm/bitops.h | 48 +++++++++++++++++------------------ arch/powerpc/include/asm/local.h | 12 ++++----- arch/powerpc/include/asm/ppc-opcode.h | 4 +++ 4 files changed, 37 insertions(+), 32 deletions(-) (limited to 'arch/powerpc/include/asm/ppc-opcode.h') diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index 8f0fe7971949..c1b475a941eb 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -2,6 +2,7 @@ #define _ASM_POWERPC_ASM_COMPAT_H #include +#include #ifdef __ASSEMBLY__ # define stringify_in_c(...) __VA_ARGS__ @@ -24,7 +25,7 @@ #define PPC_LONG stringify_in_c(.llong) #define PPC_LONG_ALIGN stringify_in_c(.balign 8) #define PPC_TLNEI stringify_in_c(tdnei) -#define PPC_LLARX stringify_in_c(ldarx) +#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh) #define PPC_STLCX stringify_in_c(stdcx.) #define PPC_CNTLZL stringify_in_c(cntlzd) @@ -46,7 +47,7 @@ #define PPC_LONG stringify_in_c(.long) #define PPC_LONG_ALIGN stringify_in_c(.balign 4) #define PPC_TLNEI stringify_in_c(twnei) -#define PPC_LLARX stringify_in_c(lwarx) +#define PPC_LLARX(t, a, b, eh) PPC_LWARX(t, a, b, eh) #define PPC_STLCX stringify_in_c(stwcx.) #define PPC_CNTLZL stringify_in_c(cntlzw) #define PPC_MTOCRF stringify_in_c(mtcrf) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 56f2f2ea5631..3c7c37bd92e3 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -65,7 +65,7 @@ static __inline__ void fn(unsigned long mask, \ unsigned long *p = (unsigned long *)_p; \ __asm__ __volatile__ ( \ prefix \ -"1:" PPC_LLARX "%0,0,%3\n" \ +"1:" PPC_LLARX(%0,0,%3,0) "\n" \ stringify_in_c(op) "%0,%0,%2\n" \ PPC405_ERR77(0,%3) \ PPC_STLCX "%0,0,%3\n" \ @@ -103,31 +103,31 @@ static __inline__ void change_bit(int nr, volatile unsigned long *addr) /* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output * operands. */ -#define DEFINE_TESTOP(fn, op, prefix, postfix) \ -static __inline__ unsigned long fn( \ - unsigned long mask, \ - volatile unsigned long *_p) \ -{ \ - unsigned long old, t; \ - unsigned long *p = (unsigned long *)_p; \ - __asm__ __volatile__ ( \ - prefix \ -"1:" PPC_LLARX "%0,0,%3\n" \ - stringify_in_c(op) "%1,%0,%2\n" \ - PPC405_ERR77(0,%3) \ - PPC_STLCX "%1,0,%3\n" \ - "bne- 1b\n" \ - postfix \ - : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p) \ - : "cc", "memory"); \ - return (old & mask); \ +#define DEFINE_TESTOP(fn, op, prefix, postfix, eh) \ +static __inline__ unsigned long fn( \ + unsigned long mask, \ + volatile unsigned long *_p) \ +{ \ + unsigned long old, t; \ + unsigned long *p = (unsigned long *)_p; \ + __asm__ __volatile__ ( \ + prefix \ +"1:" PPC_LLARX(%0,0,%3,eh) "\n" \ + stringify_in_c(op) "%1,%0,%2\n" \ + PPC405_ERR77(0,%3) \ + PPC_STLCX "%1,0,%3\n" \ + "bne- 1b\n" \ + postfix \ + : "=&r" (old), "=&r" (t) \ + : "r" (mask), "r" (p) \ + : "cc", "memory"); \ + return (old & mask); \ } -DEFINE_TESTOP(test_and_set_bits, or, LWSYNC_ON_SMP, ISYNC_ON_SMP) -DEFINE_TESTOP(test_and_set_bits_lock, or, "", ISYNC_ON_SMP) -DEFINE_TESTOP(test_and_clear_bits, andc, LWSYNC_ON_SMP, ISYNC_ON_SMP) -DEFINE_TESTOP(test_and_change_bits, xor, LWSYNC_ON_SMP, ISYNC_ON_SMP) +DEFINE_TESTOP(test_and_set_bits, or, LWSYNC_ON_SMP, ISYNC_ON_SMP, 0) +DEFINE_TESTOP(test_and_set_bits_lock, or, "", ISYNC_ON_SMP, 1) +DEFINE_TESTOP(test_and_clear_bits, andc, LWSYNC_ON_SMP, ISYNC_ON_SMP, 0) +DEFINE_TESTOP(test_and_change_bits, xor, LWSYNC_ON_SMP, ISYNC_ON_SMP, 0) static __inline__ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr) diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 84b457a3c1bc..ce58c80e1bcf 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -24,7 +24,7 @@ static __inline__ long local_add_return(long a, local_t *l) long t; __asm__ __volatile__( -"1:" PPC_LLARX "%0,0,%2 # local_add_return\n\ +"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\ add %0,%1,%0\n" PPC405_ERR77(0,%2) PPC_STLCX "%0,0,%2 \n\ @@ -43,7 +43,7 @@ static __inline__ long local_sub_return(long a, local_t *l) long t; __asm__ __volatile__( -"1:" PPC_LLARX "%0,0,%2 # local_sub_return\n\ +"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\ subf %0,%1,%0\n" PPC405_ERR77(0,%2) PPC_STLCX "%0,0,%2 \n\ @@ -60,7 +60,7 @@ static __inline__ long local_inc_return(local_t *l) long t; __asm__ __volatile__( -"1:" PPC_LLARX "%0,0,%1 # local_inc_return\n\ +"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\ addic %0,%0,1\n" PPC405_ERR77(0,%1) PPC_STLCX "%0,0,%1 \n\ @@ -87,7 +87,7 @@ static __inline__ long local_dec_return(local_t *l) long t; __asm__ __volatile__( -"1:" PPC_LLARX "%0,0,%1 # local_dec_return\n\ +"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\ addic %0,%0,-1\n" PPC405_ERR77(0,%1) PPC_STLCX "%0,0,%1\n\ @@ -117,7 +117,7 @@ static __inline__ int local_add_unless(local_t *l, long a, long u) long t; __asm__ __volatile__ ( -"1:" PPC_LLARX "%0,0,%1 # local_add_unless\n\ +"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\ cmpw 0,%0,%3 \n\ beq- 2f \n\ add %0,%2,%0 \n" @@ -147,7 +147,7 @@ static __inline__ long local_dec_if_positive(local_t *l) long t; __asm__ __volatile__( -"1:" PPC_LLARX "%0,0,%1 # local_dec_if_positive\n\ +"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\ cmpwi %0,1\n\ addi %0,%0,-1\n\ blt- 2f\n" diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ecec76051184..aea714797590 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -22,6 +22,7 @@ #define PPC_INST_DCBZL 0x7c2007ec #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e +#define PPC_INST_LDARX 0x7c0000a8 #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000029 @@ -75,6 +76,9 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \ + __PPC_RT(t) | __PPC_RA(a) | \ + __PPC_RB(b) | __PPC_EH(eh)) #define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \ __PPC_RT(t) | __PPC_RA(a) | \ __PPC_RB(b) | __PPC_EH(eh)) -- cgit v1.2.3