summaryrefslogtreecommitdiffstats
path: root/kernel/locking
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2018-04-05 11:05:35 +0200
committerIngo Molnar <mingo@kernel.org>2018-05-04 10:02:39 +0200
commitc427f69564e2a844c5fcf2804042609342513da0 (patch)
tree2db714fd8600eb96734dd88c2c6f9bb4beff7b3e /kernel/locking
parentde5b55c1d4e30740009864eb35ce4ed856aac01d (diff)
downloadlinux-c427f69564e2a844c5fcf2804042609342513da0.tar.bz2
locking/mutex: Optimize __mutex_trylock_fast()
Use try_cmpxchg to avoid the pointless TEST instruction.. And add the (missing) atomic_long_try_cmpxchg*() wrappery. On x86_64 this gives: 0000000000000710 <mutex_lock>: 0000000000000710 <mutex_lock>: 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 717: 00 00 717: 00 00 715: R_X86_64_32S current_task 715: R_X86_64_32S current_task 719: 31 c0 xor %eax,%eax 719: 31 c0 xor %eax,%eax 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 720: 48 85 c0 test %rax,%rax 720: 75 02 jne 724 <mutex_lock+0x14> 723: 75 02 jne 727 <mutex_lock+0x17> 722: f3 c3 repz retq 725: f3 c3 repz retq 724: eb da jmp 700 <__mutex_lock_slowpath> 727: eb d7 jmp 700 <__mutex_lock_slowpath> 726: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 729: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 72d: 00 00 00 On ARM64 this gives: 000000000000638 <mutex_lock>: 0000000000000638 <mutex_lock>: 638: d5384101 mrs x1, sp_el0 638: d5384101 mrs x1, sp_el0 63c: d2800002 mov x2, #0x0 63c: d2800002 mov x2, #0x0 640: f9800011 prfm pstl1strm, [x0] 640: f9800011 prfm pstl1strm, [x0] 644: c85ffc03 ldaxr x3, [x0] 644: c85ffc03 ldaxr x3, [x0] 648: ca020064 eor x4, x3, x2 648: ca020064 eor x4, x3, x2 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 650: c8047c01 stxr w4, x1, [x0] 650: c8047c01 stxr w4, x1, [x0] 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 658: b40000c3 cbz x3, 670 <mutex_lock+0x38> 658: b5000043 cbnz x3, 660 <mutex_lock+0x28> 65c: a9bf7bfd stp x29, x30, [sp,#-16]! 65c: d65f03c0 ret 660: 910003fd mov x29, sp 660: a9bf7bfd stp x29, x30, [sp,#-16]! 664: 97ffffef bl 620 <__mutex_lock_slowpath> 664: 910003fd mov x29, sp 668: a8c17bfd ldp x29, x30, [sp],#16 668: 97ffffee bl 620 <__mutex_lock_slowpath> 66c: d65f03c0 ret 66c: a8c17bfd ldp x29, x30, [sp],#16 670: d65f03c0 ret 670: d65f03c0 ret Reported-by: Matthew Wilcox <mawilcox@microsoft.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/mutex.c3
1 files changed, 2 insertions, 1 deletions
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2048359f33d2..f44f658ae629 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
{
unsigned long curr = (unsigned long)current;
+ unsigned long zero = 0UL;
- if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+ if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
return true;
return false;