diff options
| author | Matthew Wilcox <matthew@wil.cx> | 2008-03-07 21:55:58 -0500 | 
|---|---|---|
| committer | Matthew Wilcox <willy@linux.intel.com> | 2008-04-17 10:42:34 -0400 | 
| commit | 64ac24e738823161693bf791f87adc802cf529ff (patch) | |
| tree | 19c0b0cf314d4394ca580c05b86cdf874ce0a167 /kernel | |
| parent | e48b3deee475134585eed03e7afebe4bf9e0dba9 (diff) | |
| download | linux-64ac24e738823161693bf791f87adc802cf529ff.tar.bz2 | |
Generic semaphore implementation
Semaphores are no longer performance-critical, so a generic C
implementation is better for maintainability, debuggability and
extensibility.  Thanks to Peter Zijlstra for fixing the lockdep
warning.  Thanks to Harvey Harrison for pointing out that the
unlikely() was unnecessary.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 2 | ||||
| -rw-r--r-- | kernel/semaphore.c | 187 | 
2 files changed, 188 insertions, 1 deletions
| diff --git a/kernel/Makefile b/kernel/Makefile index 6c584c55a6e9..f45c69e69688 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,7 +8,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \  	    signal.o sys.o kmod.o workqueue.o pid.o \  	    rcupdate.o extable.o params.o posix-timers.o \  	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ -	    hrtimer.o rwsem.o nsproxy.o srcu.o \ +	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \  	    notifier.o ksysfs.o pm_qos_params.o  obj-$(CONFIG_SYSCTL) += sysctl_check.o diff --git a/kernel/semaphore.c b/kernel/semaphore.c new file mode 100644 index 000000000000..d5a72702f261 --- /dev/null +++ b/kernel/semaphore.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2008 Intel Corporation + * Author: Matthew Wilcox <willy@linux.intel.com> + * + * Distributed under the terms of the GNU GPL, version 2 + */ + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/semaphore.h> +#include <linux/spinlock.h> + +/* + * Some notes on the implementation: + * + * down_trylock() and up() can be called from interrupt context. + * So we have to disable interrupts when taking the lock. + * + * The ->count variable, if positive, defines how many more tasks can + * acquire the semaphore.  If negative, it represents how many tasks are + * waiting on the semaphore (*).  If zero, no tasks are waiting, and no more + * tasks can acquire the semaphore. + * + * (*) Except for the window between one task calling up() and the task + * sleeping in a __down_common() waking up.  In order to avoid a third task + * coming in and stealing the second task's wakeup, we leave the ->count + * negative.  If we have a more complex situation, the ->count may become + * zero or negative (eg a semaphore with count = 2, three tasks attempt to + * acquire it, one sleeps, two finish and call up(), the second task to call + * up() notices that the list is empty and just increments count). + */ + +static noinline void __down(struct semaphore *sem); +static noinline int __down_interruptible(struct semaphore *sem); +static noinline void __up(struct semaphore *sem); + +void down(struct semaphore *sem) +{ +	unsigned long flags; + +	spin_lock_irqsave(&sem->lock, flags); +	if (unlikely(sem->count-- <= 0)) +		__down(sem); +	spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(down); + +int down_interruptible(struct semaphore *sem) +{ +	unsigned long flags; +	int result = 0; + +	spin_lock_irqsave(&sem->lock, flags); +	if (unlikely(sem->count-- <= 0)) +		result = __down_interruptible(sem); +	spin_unlock_irqrestore(&sem->lock, flags); + +	return result; +} +EXPORT_SYMBOL(down_interruptible); + +/** + * down_trylock - try to acquire the semaphore, without waiting + * @sem: the semaphore to be acquired + * + * Try to acquire the semaphore atomically.  Returns 0 if the mutex has + * been acquired successfully and 1 if it is contended. + * + * NOTE: This return value is inverted from both spin_trylock and + * mutex_trylock!  Be careful about this when converting code. + * + * Unlike mutex_trylock, this function can be used from interrupt context, + * and the semaphore can be released by any task or interrupt. + */ +int down_trylock(struct semaphore *sem) +{ +	unsigned long flags; +	int count; + +	spin_lock_irqsave(&sem->lock, flags); +	count = sem->count - 1; +	if (likely(count >= 0)) +		sem->count = count; +	spin_unlock_irqrestore(&sem->lock, flags); + +	return (count < 0); +} +EXPORT_SYMBOL(down_trylock); + +void up(struct semaphore *sem) +{ +	unsigned long flags; + +	spin_lock_irqsave(&sem->lock, flags); +	if (likely(sem->count >= 0)) +		sem->count++; +	else +		__up(sem); +	spin_unlock_irqrestore(&sem->lock, flags); +} +EXPORT_SYMBOL(up); + +/* Functions for the contended case */ + +struct semaphore_waiter { +	struct list_head list; +	struct task_struct *task; +	int up; +}; + +/* + * Wake up a process waiting on a semaphore.  We need to call this from both + * __up and __down_common as it's possible to race a task into the semaphore + * if it comes in at just the right time between two tasks calling up() and + * a third task waking up.  This function assumes the wait_list is already + * checked for being non-empty. + */ +static noinline void __sched __up_down_common(struct semaphore *sem) +{ +	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, +						struct semaphore_waiter, list); +	list_del(&waiter->list); +	waiter->up = 1; +	wake_up_process(waiter->task); +} + +/* + * Because this function is inlined, the 'state' parameter will be constant, + * and thus optimised away by the compiler. + */ +static inline int __sched __down_common(struct semaphore *sem, long state) +{ +	int result = 0; +	struct task_struct *task = current; +	struct semaphore_waiter waiter; + +	list_add_tail(&waiter.list, &sem->wait_list); +	waiter.task = task; +	waiter.up = 0; + +	for (;;) { +		if (state == TASK_INTERRUPTIBLE && signal_pending(task)) +			goto interrupted; +		__set_task_state(task, state); +		spin_unlock_irq(&sem->lock); +		schedule(); +		spin_lock_irq(&sem->lock); +		if (waiter.up) +			goto woken; +	} + + interrupted: +	list_del(&waiter.list); +	result = -EINTR; + woken: +	/* +	 * Account for the process which woke us up.  For the case where +	 * we're interrupted, we need to increment the count on our own +	 * behalf.  I don't believe we can hit the case where the +	 * sem->count hits zero, *and* there's a second task sleeping, +	 * but it doesn't hurt, that's not a commonly exercised path and +	 * it's not a performance path either. +	 */ +	if (unlikely((++sem->count >= 0) && !list_empty(&sem->wait_list))) +		__up_down_common(sem); +	return result; +} + +static noinline void __sched __down(struct semaphore *sem) +{ +	__down_common(sem, TASK_UNINTERRUPTIBLE); +} + +static noinline int __sched __down_interruptible(struct semaphore *sem) +{ +	return __down_common(sem, TASK_INTERRUPTIBLE); +} + +static noinline void __sched __up(struct semaphore *sem) +{ +	if (unlikely(list_empty(&sem->wait_list))) +		sem->count++; +	else +		__up_down_common(sem); +} |