From 8fe8f545c6d753ead15e1f4919d39e8f9bb49629 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Sun, 6 Mar 2011 18:07:50 -0800 Subject: futex: Update futex_wait_setup comments about locking Reviving a cleanup I had done about a year ago as part of a larger futex_set_wait proposal. Over the years, the locking of the hashed futex queue got improved, so that some of the "rare but normal" race conditions described in comments can't actually happen anymore. Signed-off-by: Michel Lespinasse Cc: Linus Torvalds Cc: Darren Hart Cc: Peter Zijlstra LKML-Reference: <20110307020750.GA31188@google.com> Signed-off-by: Thomas Gleixner --- kernel/futex.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index b766d28accd6..3184d3b9cadf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1781,13 +1781,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * * The basic logical guarantee of a futex is that it blocks ONLY * if cond(var) is known to be true at the time of blocking, for - * any cond. If we queued after testing *uaddr, that would open - * a race condition where we could block indefinitely with + * any cond. If we locked the hash-bucket after testing *uaddr, that + * would open a race condition where we could block indefinitely with * cond(var) false, which would violate the guarantee. * - * A consequence is that futex_wait() can return zero and absorb - * a wakeup when *uaddr != val on entry to the syscall. This is - * rare, but normal. + * On the other hand, we insert q and release the hash-bucket only + * after testing *uaddr. This guarantees that futex_wait() will NOT + * absorb a wakeup if *uaddr does not match the desired values + * while the syscall executes. */ retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); -- cgit v1.2.3 From c0c9ed15042ceac7c485813012a0a97316101b57 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Mar 2011 11:51:22 +0100 Subject: futex: Avoid redudant evaluation of task_pid_vnr() The result is not going to change under us, so no need to reevaluate this over and over. Seems to be a leftover from the mechanical mass conversion of task->pid to task_pid_vnr(tsk). Signed-off-by: Thomas Gleixner --- kernel/futex.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 3184d3b9cadf..773815465bac 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -674,7 +674,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct task_struct *task, int set_waiters) { int lock_taken, ret, ownerdied = 0; - u32 uval, newval, curval; + u32 uval, newval, curval, vpid = task_pid_vnr(task); retry: ret = lock_taken = 0; @@ -684,7 +684,7 @@ retry: * (by doing a 0 -> TID atomic cmpxchg), while holding all * the locks. It will most likely not succeed. */ - newval = task_pid_vnr(task); + newval = vpid; if (set_waiters) newval |= FUTEX_WAITERS; @@ -696,7 +696,7 @@ retry: /* * Detect deadlocks. */ - if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) + if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; /* @@ -723,7 +723,7 @@ retry: */ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { /* Keep the OWNER_DIED bit */ - newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); + newval = (curval & ~FUTEX_TID_MASK) | vpid; ownerdied = 0; lock_taken = 1; } @@ -2047,9 +2047,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { struct futex_hash_bucket *hb; struct futex_q *this, *next; - u32 uval; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; + u32 uval, vpid = task_pid_vnr(current); int ret; retry: @@ -2058,7 +2058,7 @@ retry: /* * We release only a lock we actually own: */ - if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) + if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); @@ -2074,7 +2074,7 @@ retry: * anyone else up: */ if (!(uval & FUTEX_OWNER_DIED)) - uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); + uval = cmpxchg_futex_value_locked(uaddr, vpid, 0); if (unlikely(uval == -EFAULT)) @@ -2083,7 +2083,7 @@ retry: * Rare case: we managed to release the lock atomically, * no need to wake anyone else up: */ - if (unlikely(uval == task_pid_vnr(current))) + if (unlikely(uval == vpid)) goto out_unlock; /* -- cgit v1.2.3 From 37a9d912b24f96a0591773e6e6c3642991ae5a70 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Thu, 10 Mar 2011 18:48:51 -0800 Subject: futex: Sanitize cmpxchg_futex_value_locked API The cmpxchg_futex_value_locked API was funny in that it returned either the original, user-exposed futex value OR an error code such as -EFAULT. This was confusing at best, and could be a source of livelocks in places that retry the cmpxchg_futex_value_locked after trying to fix the issue by running fault_in_user_writeable(). This change makes the cmpxchg_futex_value_locked API more similar to the get_futex_value_locked one, returning an error code and updating the original value through a reference argument. Signed-off-by: Michel Lespinasse Acked-by: Chris Metcalf [tile] Acked-by: Tony Luck [ia64] Acked-by: Thomas Gleixner Tested-by: Michal Simek [microblaze] Acked-by: David Howells [frv] Cc: Darren Hart Cc: Peter Zijlstra Cc: Matt Turner Cc: Russell King Cc: Ralf Baechle Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Paul Mundt Cc: "David S. Miller" Cc: Linus Torvalds LKML-Reference: <20110311024851.GC26122@google.com> Signed-off-by: Thomas Gleixner --- arch/alpha/include/asm/futex.h | 22 +++++++++--------- arch/arm/include/asm/futex.h | 18 ++++++++------- arch/frv/include/asm/futex.h | 3 ++- arch/ia64/include/asm/futex.h | 9 +++++--- arch/microblaze/include/asm/futex.h | 24 +++++++++++--------- arch/mips/include/asm/futex.h | 32 +++++++++++++------------- arch/parisc/include/asm/futex.h | 18 +++++++-------- arch/powerpc/include/asm/futex.h | 20 +++++++++-------- arch/s390/include/asm/futex.h | 4 ++-- arch/s390/include/asm/uaccess.h | 2 +- arch/s390/lib/uaccess.h | 4 ++-- arch/s390/lib/uaccess_pt.c | 13 ++++++----- arch/s390/lib/uaccess_std.c | 6 +++-- arch/sh/include/asm/futex-irq.h | 9 ++++---- arch/sh/include/asm/futex.h | 5 +++-- arch/sparc/include/asm/futex_64.h | 16 ++++++++----- arch/tile/include/asm/futex.h | 7 +++--- arch/x86/include/asm/futex.h | 16 +++++++------ include/asm-generic/futex.h | 3 ++- kernel/futex.c | 45 +++++++++++++------------------------ 20 files changed, 144 insertions(+), 132 deletions(-) (limited to 'kernel') diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h index 945de222ab91..c4e5c2850cce 100644 --- a/arch/alpha/include/asm/futex.h +++ b/arch/alpha/include/asm/futex.h @@ -81,21 +81,22 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int prev, cmp; + int ret = 0, prev, cmp; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; __asm__ __volatile__ ( __ASM_SMP_MB - "1: ldl_l %0,0(%2)\n" - " cmpeq %0,%3,%1\n" - " beq %1,3f\n" - " mov %4,%1\n" - "2: stl_c %1,0(%2)\n" - " beq %1,4f\n" + "1: ldl_l %1,0(%3)\n" + " cmpeq %1,%4,%2\n" + " beq %2,3f\n" + " mov %5,%2\n" + "2: stl_c %2,0(%3)\n" + " beq %2,4f\n" "3: .subsection 2\n" "4: br 1b\n" " .previous\n" @@ -105,11 +106,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .long 2b-.\n" " lda $31,3b-2b(%0)\n" " .previous\n" - : "=&r"(prev), "=&r"(cmp) + : "+r"(ret), "=&r"(prev), "=&r"(cmp) : "r"(uaddr), "r"((long)oldval), "r"(newval) : "memory"); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 7133a8620830..d20b78fce758 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -88,9 +88,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int val; + int ret = 0, val; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; @@ -99,24 +100,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) * call sites. */ __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %0, [%3]\n" - " teq %0, %1\n" + "1: " T(ldr) " %1, [%4]\n" + " teq %1, %2\n" " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %2, [%3]\n" + "2: " T(streq) " %3, [%4]\n" "3:\n" " .pushsection __ex_table,\"a\"\n" " .align 3\n" " .long 1b, 4f, 2b, 4f\n" " .popsection\n" " .pushsection .fixup,\"ax\"\n" - "4: mov %0, %4\n" + "4: mov %0, %5\n" " b 3b\n" " .popsection" - : "=&r" (val) + : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); - return val; + *uval = val; + return ret; } #endif /* !SMP */ diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h index 08b3d1da3583..0548f8e4d11e 100644 --- a/arch/frv/include/asm/futex.h +++ b/arch/frv/include/asm/futex.h @@ -10,7 +10,8 @@ extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { return -ENOSYS; } diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h index c7f0f062239c..b0728404dad0 100644 --- a/arch/ia64/include/asm/futex.h +++ b/arch/ia64/include/asm/futex.h @@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; { - register unsigned long r8 __asm ("r8"); + register unsigned long r8 __asm ("r8") = 0; + unsigned long prev; __asm__ __volatile__( " mf;; \n" " mov ar.ccv=%3;; \n" "[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n" " .xdata4 \"__ex_table\", 1b-., 2f-. \n" "[2:]" - : "=r" (r8) + : "=r" (prev) : "r" (uaddr), "r" (newval), "rO" ((long) (unsigned) oldval) : "memory"); + *uval = prev; return r8; } } diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h index ad3fd61b2fe7..fa019ed65dfb 100644 --- a/arch/microblaze/include/asm/futex.h +++ b/arch/microblaze/include/asm/futex.h @@ -94,31 +94,33 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int prev, cmp; + int ret = 0, prev, cmp; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - __asm__ __volatile__ ("1: lwx %0, %2, r0; \ - cmp %1, %0, %3; \ - beqi %1, 3f; \ - 2: swx %4, %2, r0; \ - addic %1, r0, 0; \ - bnei %1, 1b; \ + __asm__ __volatile__ ("1: lwx %1, %3, r0; \ + cmp %2, %1, %4; \ + beqi %2, 3f; \ + 2: swx %5, %3, r0; \ + addic %2, r0, 0; \ + bnei %2, 1b; \ 3: \ .section .fixup,\"ax\"; \ 4: brid 3b; \ - addik %0, r0, %5; \ + addik %0, r0, %6; \ .previous; \ .section __ex_table,\"a\"; \ .word 1b,4b,2b,4b; \ .previous;" \ - : "=&r" (prev), "=&r"(cmp) \ + : "+r" (ret), "=&r" (prev), "=&r"(cmp) \ : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index b9cce90346cf..692a24bd83b7 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -132,9 +132,10 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int retval; + int ret = 0, val; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; @@ -145,25 +146,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .set push \n" " .set noat \n" " .set mips3 \n" - "1: ll %0, %2 \n" - " bne %0, %z3, 3f \n" + "1: ll %1, %3 \n" + " bne %1, %z4, 3f \n" " .set mips0 \n" - " move $1, %z4 \n" + " move $1, %z5 \n" " .set mips3 \n" - "2: sc $1, %1 \n" + "2: sc $1, %2 \n" " beqzl $1, 1b \n" __WEAK_LLSC_MB "3: \n" " .set pop \n" " .section .fixup,\"ax\" \n" - "4: li %0, %5 \n" + "4: li %0, %6 \n" " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" " "__UA_ADDR "\t1b, 4b \n" " "__UA_ADDR "\t2b, 4b \n" " .previous \n" - : "=&r" (retval), "=R" (*uaddr) + : "+r" (ret), "=&r" (val), "=R" (*uaddr) : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { @@ -172,31 +173,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) " .set push \n" " .set noat \n" " .set mips3 \n" - "1: ll %0, %2 \n" - " bne %0, %z3, 3f \n" + "1: ll %1, %3 \n" + " bne %1, %z4, 3f \n" " .set mips0 \n" - " move $1, %z4 \n" + " move $1, %z5 \n" " .set mips3 \n" - "2: sc $1, %1 \n" + "2: sc $1, %2 \n" " beqz $1, 1b \n" __WEAK_LLSC_MB "3: \n" " .set pop \n" " .section .fixup,\"ax\" \n" - "4: li %0, %5 \n" + "4: li %0, %6 \n" " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" " "__UA_ADDR "\t1b, 4b \n" " "__UA_ADDR "\t2b, 4b \n" " .previous \n" - : "=&r" (retval), "=R" (*uaddr) + : "+r" (ret), "=&r" (val), "=R" (*uaddr) : "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); } else return -ENOSYS; - return retval; + *uval = val; + return ret; } #endif diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index 0c705c3a55ef..4c6d8672325b 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) /* Non-atomic version */ static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int err = 0; - int uval; + int val; /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is * our gateway page, and causes no end of trouble... @@ -65,12 +65,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - err = get_user(uval, uaddr); - if (err) return -EFAULT; - if (uval == oldval) - err = put_user(newval, uaddr); - if (err) return -EFAULT; - return uval; + if (get_user(val, uaddr)) + return -EFAULT; + if (val == oldval && put_user(newval, uaddr)) + return -EFAULT; + *uval = val; + return 0; } #endif /*__KERNEL__*/ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 7c589ef81fb0..631e8da60064 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -82,35 +82,37 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { - int prev; + int ret = 0, prev; if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; __asm__ __volatile__ ( PPC_RELEASE_BARRIER -"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\ - cmpw 0,%0,%3\n\ +"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ + cmpw 0,%1,%4\n\ bne- 3f\n" - PPC405_ERR77(0,%2) -"2: stwcx. %4,0,%2\n\ + PPC405_ERR77(0,%3) +"2: stwcx. %5,0,%3\n\ bne- 1b\n" PPC_ACQUIRE_BARRIER "3: .section .fixup,\"ax\"\n\ -4: li %0,%5\n\ +4: li %0,%6\n\ b 3b\n\ .previous\n\ .section __ex_table,\"a\"\n\ .align 3\n\ " PPC_LONG "1b,4b,2b,4b\n\ .previous" \ - : "=&r" (prev), "+m" (*uaddr) + : "+r" (ret), "=&r" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); - return prev; + *uval = prev; + return ret; } #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 5c5d02de49e9..27ac515ef59c 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, +static inline int futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, int oldval, int newval) { if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval); + return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d6b1ed0ec52b..549adf6a9b8b 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -84,7 +84,7 @@ struct uaccess_ops { size_t (*strnlen_user)(size_t, const char __user *); size_t (*strncpy_from_user)(size_t, const char __user *, char *); int (*futex_atomic_op)(int op, int __user *, int oparg, int *old); - int (*futex_atomic_cmpxchg)(int __user *, int old, int new); + int (*futex_atomic_cmpxchg)(int *, int __user *, int old, int new); }; extern struct uaccess_ops uaccess; diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index 126011df14f1..89a80674e44b 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *); extern size_t copy_to_user_std(size_t, void __user *, const void *); extern size_t strnlen_user_std(size_t, const char __user *); extern size_t strncpy_from_user_std(size_t, const char __user *, char *); -extern int futex_atomic_cmpxchg_std(int __user *, int, int); +extern int futex_atomic_cmpxchg_std(int *, int __user *, int, int); extern int futex_atomic_op_std(int, int __user *, int, int *); extern size_t copy_from_user_pt(size_t, const void __user *, void *); extern size_t copy_to_user_pt(size_t, void __user *, const void *); extern int futex_atomic_op_pt(int, int __user *, int, int *); -extern int futex_atomic_cmpxchg_pt(int __user *, int, int); +extern int futex_atomic_cmpxchg_pt(int *, int __user *, int, int); #endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 404f2de296dc..b3cebcd52f5c 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old) return ret; } -static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +static int __futex_atomic_cmpxchg_pt(int *uval, int __user *uaddr, + int oldval, int newval) { int ret; asm volatile("0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2:\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } -int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_pt(int *uval, int __user *uaddr, + int oldval, int newval) { int ret; if (segment_eq(get_fs(), KERNEL_DS)) - return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); if (!uaddr) { @@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval) } get_page(virt_to_page(uaddr)); spin_unlock(¤t->mm->page_table_lock); - ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval); + ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); put_page(virt_to_page(uaddr)); return ret; } diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index a6c4f7ed24a4..1d6643c0b95f 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval) +int futex_atomic_cmpxchg_std(int *uval, int __user *uaddr, + int oldval, int newval) { int ret; asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" - "1: lr %0,%1\n" + "1: la %0,0\n" "2: sacf 0\n" EX_TABLE(0b,2b) EX_TABLE(1b,2b) : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory" ); + *uval = oldval; return ret; } diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h index a9f16a7f9aea..7b701cbd1e84 100644 --- a/arch/sh/include/asm/futex-irq.h +++ b/arch/sh/include/asm/futex-irq.h @@ -88,7 +88,8 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, return ret; } -static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, +static inline int atomic_futex_op_cmpxchg_inatomic(int *uval, + int __user *uaddr, int oldval, int newval) { unsigned long flags; @@ -102,10 +103,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr, local_irq_restore(flags); - if (ret) - return ret; - - return prev; + *uval = prev; + return ret; } #endif /* __ASM_SH_FUTEX_IRQ_H */ diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h index 68256ec5fa35..a8a5125dc9b4 100644 --- a/arch/sh/include/asm/futex.h +++ b/arch/sh/include/asm/futex.h @@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval); + return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval); } #endif /* __KERNEL__ */ diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h index 47f95839dc69..e0862200d6a1 100644 --- a/arch/sparc/include/asm/futex_64.h +++ b/arch/sparc/include/asm/futex_64.h @@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { + int ret = 0; + __asm__ __volatile__( - "\n1: casa [%3] %%asi, %2, %0\n" + "\n1: casa [%4] %%asi, %3, %1\n" "2:\n" " .section .fixup,#alloc,#execinstr\n" " .align 4\n" "3: sethi %%hi(2b), %0\n" " jmpl %0 + %%lo(2b), %%g0\n" - " mov %4, %0\n" + " mov %5, %0\n" " .previous\n" " .section __ex_table,\"a\"\n" " .align 4\n" " .word 1b, 3b\n" " .previous\n" - : "=r" (newval) - : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) + : "+r" (ret), "=r" (newval) + : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT) : "memory"); - return newval; + *uval = newval; + return ret; } #endif /* !(_SPARC64_FUTEX_H) */ diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index fe0d10dcae57..664b20aa2584 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -119,8 +119,8 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, - int newval) +static inline int futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { struct __get_user asm_ret; @@ -128,7 +128,8 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return -EFAULT; asm_ret = futex_cmpxchg(uaddr, oldval, newval); - return asm_ret.err ? asm_ret.err : asm_ret.val; + *uval = asm_ret.val; + return asm_ret.err; } #ifndef __tilegx__ diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce44e956..884c0b5676f4 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, - int newval) +static inline int futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { + int ret = 0; #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) /* Real i386 machines have no cmpxchg instruction */ @@ -122,18 +123,19 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" "2:\t.section .fixup, \"ax\"\n" - "3:\tmov %2, %0\n" + "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE(1b, 3b) - : "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "0" (oldval) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "1" (oldval) : "memory" ); - return oldval; + *uval = oldval; + return ret; } #endif diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 3c2344f48136..132bf5227b44 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int *uval, int __user *uaddr, + int oldval, int newval) { return -ENOSYS; } diff --git a/kernel/futex.c b/kernel/futex.c index 773815465bac..237f14bfc022 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, return NULL; } -static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) +static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, + u32 uval, u32 newval) { - u32 curval; + int ret; pagefault_disable(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); pagefault_enable(); - return curval; + return ret; } static int get_futex_value_locked(u32 *dest, u32 __user *from) @@ -688,9 +689,7 @@ retry: if (set_waiters) newval |= FUTEX_WAITERS; - curval = cmpxchg_futex_value_locked(uaddr, 0, newval); - - if (unlikely(curval == -EFAULT)) + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) return -EFAULT; /* @@ -728,9 +727,7 @@ retry: lock_taken = 1; } - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (unlikely(curval == -EFAULT)) + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; if (unlikely(curval != uval)) goto retry; @@ -843,9 +840,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) ret = -EFAULT; else if (curval != uval) ret = -EINVAL; @@ -880,10 +875,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) * There is no waiter, so we unlock the futex. The owner died * bit has not to be preserved here. We are the owner: */ - oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); - - if (oldval == -EFAULT) - return oldval; + if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0)) + return -EFAULT; if (oldval != uval) return -EAGAIN; @@ -1578,9 +1571,7 @@ retry: while (1) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) goto handle_fault; if (curval == uval) break; @@ -2073,11 +2064,8 @@ retry: * again. If it succeeds then we can return without waking * anyone else up: */ - if (!(uval & FUTEX_OWNER_DIED)) - uval = cmpxchg_futex_value_locked(uaddr, vpid, 0); - - - if (unlikely(uval == -EFAULT)) + if (!(uval & FUTEX_OWNER_DIED) && + cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* * Rare case: we managed to release the lock atomically, @@ -2464,9 +2452,7 @@ retry: * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); - - if (nval == -EFAULT) + if (futex_atomic_cmpxchg_inatomic(&nval, uaddr, uval, mval)) return -1; if (nval != uval) @@ -2679,8 +2665,7 @@ static int __init futex_init(void) * implementation, the non-functional ones will return * -ENOSYS. */ - curval = cmpxchg_futex_value_locked(NULL, 0, 0); - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) futex_cmpxchg_enabled = 1; for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { -- cgit v1.2.3 From 2e12978a9f7a7abd54e8eb9ce70a7718767b8b2c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 22 Dec 2010 14:18:50 +0800 Subject: futex,plist: Pass the real head of the priority list to plist_del() Some plist_del()s in kernel/futex.c are passed a faked head of the priority list. It does not fail because the current code does not require the real head in plist_del(). The current code of plist_del() just uses the head for checking, so it will not cause a bad result even when we use a faked head. But it is undocumented usage: /** * plist_del - Remove a @node from plist. * * @node: &struct plist_node pointer - entry to be removed * @head: &struct plist_head pointer - list head */ The document says that the @head is the "list head" head of the priority list. In futex code, several places use "plist_del(&q->list, &q->list.plist);", they pass a fake head. We need to fix them all. Thanks to Darren Hart for many suggestions. Acked-by: Darren Hart Signed-off-by: Lai Jiangshan LKML-Reference: <4D11984A.5030203@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/futex.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index b766d28accd6..6feeea4f8f15 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -775,6 +775,24 @@ retry: return ret; } +/** + * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket + * @q: The futex_q to unqueue + * + * The q->lock_ptr must not be NULL and must be held by the caller. + */ +static void __unqueue_futex(struct futex_q *q) +{ + struct futex_hash_bucket *hb; + + if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) + || plist_node_empty(&q->list))) + return; + + hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); + plist_del(&q->list, &hb->chain); +} + /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. @@ -792,7 +810,7 @@ static void wake_futex(struct futex_q *q) */ get_task_struct(p); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); /* * The waiting task can free the futex_q as soon as * q->lock_ptr = NULL is written, without taking any locks. A @@ -1100,8 +1118,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, get_futex_key_refs(key); q->key = *key; - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); WARN_ON(!q->rt_waiter); q->rt_waiter = NULL; @@ -1504,8 +1521,7 @@ retry: spin_unlock(lock_ptr); goto retry; } - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); BUG_ON(q->pi_state); @@ -1525,8 +1541,7 @@ retry: static void unqueue_me_pi(struct futex_q *q) __releases(q->lock_ptr) { - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); BUG_ON(!q->pi_state); free_pi_state(q->pi_state); @@ -2167,7 +2182,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * We were woken prior to requeue by a timeout or a signal. * Unqueue the futex_q and determine which it was. */ - plist_del(&q->list, &q->list.plist); + plist_del(&q->list, &hb->chain); /* Handle spurious wakeups gracefully */ ret = -EWOULDBLOCK; -- cgit v1.2.3 From 017f2b239dabb2740b91df162e004371b861f371 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 21 Dec 2010 17:55:10 +0800 Subject: futex,plist: Remove debug lock assignment from plist_node The original code uses &plist_node->plist as the fake head of the priority list for plist_del(), these debug locks in the fake head are needed for CONFIG_DEBUG_PI_LIST. But now we always pass the real head to plist_del(), the debug locks in plist_node will not be used, so we remove these assignments. Acked-by: Darren Hart Signed-off-by: Lai Jiangshan LKML-Reference: <4D10797E.7040803@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/futex.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 6feeea4f8f15..9fe913141ec9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1089,9 +1089,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, plist_del(&q->list, &hb1->chain); plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb2->lock; -#endif } get_futex_key_refs(key2); q->key = *key2; @@ -1124,9 +1121,6 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, q->rt_waiter = NULL; q->lock_ptr = &hb->lock; -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb->lock; -#endif wake_up_state(q->task, TASK_NORMAL); } @@ -1474,9 +1468,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) prio = min(current->normal_prio, MAX_RT_PRIO); plist_node_init(&q->list, prio); -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb->lock; -#endif plist_add(&q->list, &hb->chain); q->task = current; spin_unlock(&hb->lock); -- cgit v1.2.3 From 6e0aa9f8a8190e0879a29bd67aa606b51734a122 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Mar 2011 10:34:35 +0100 Subject: futex: Deobfuscate handle_futex_death() handle_futex_death() uses futex_atomic_cmpxchg_inatomic() without disabling page faults. That's ok, but totally non obvious. We don't hold locks so we actually can and want to fault here, because the get_user() before futex_atomic_cmpxchg_inatomic() does not guarantee a R/W mapping. We could just add a big fat comment to explain this, but actually changing the code so that the functionality is entirely clear is better. Use the helper function which disables page faults around the futex_atomic_cmpxchg_inatomic() and handle a fault with a call to fault_in_user_writeable() as all other places in the futex code do as well. Pointed-out-by: Linus Torvalds Signed-off-by: Thomas Gleixner Acked-by: Darren Hart Cc: Michel Lespinasse Cc: Peter Zijlstra Cc: Matt Turner Cc: Russell King Cc: David Howells Cc: Tony Luck Cc: Michal Simek Cc: Ralf Baechle Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Paul Mundt Cc: "David S. Miller" Cc: Chris Metcalf LKML-Reference: Signed-off-by: Thomas Gleixner --- kernel/futex.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index c6bef6e404fe..e9251d934f7d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2458,9 +2458,20 @@ retry: * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - if (futex_atomic_cmpxchg_inatomic(&nval, uaddr, uval, mval)) - return -1; - + /* + * We are not holding a lock here, but we want to have + * the pagefault_disable/enable() protection because + * we want to handle the fault gracefully. If the + * access fails we try to fault in the futex with R/W + * verification via get_user_pages. get_user() above + * does not guarantee R/W access. If that fails we + * give up and leave the futex locked. + */ + if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { + if (fault_in_user_writeable(uaddr)) + return -1; + goto retry; + } if (nval != uval) goto retry; -- cgit v1.2.3