diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lockdep.c | 81 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.h | 6 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 51 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 14 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 3 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 7 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 98 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 22 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 20 |
9 files changed, 174 insertions, 128 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..ba77ab5f64dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) if (!new_class->name) return 0; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { if (new_class->key - new_class->subclass == class->key) return class->name_version; if (class->name && !strcmp(class->name, new_class->name)) @@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) hash_head = classhashentry(key); /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: + * We do an RCU walk of the hash, see lockdep_free_key_range(). */ - list_for_each_entry(class, hash_head, hash_entry) { + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return NULL; + + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct list_head *hash_head; struct lock_class *class; - unsigned long flags; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) key = lock->key->subkeys + subclass; hash_head = classhashentry(key); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } /* * We have to do the hash-walk again, to avoid races * with another CPU: */ - list_for_each_entry(class, hash_head, hash_entry) + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) goto out_unlock_set; + } + /* * Allocate a new key from the static array, and add it to * the hash: */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); return NULL; } - raw_local_irq_restore(flags); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); @@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); - raw_local_irq_restore(flags); printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) @@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\n"); dump_stack(); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } } out_unlock_set: graph_unlock(); - raw_local_irq_restore(flags); out_set_class_cache: if (!subclass || force) @@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, entry->distance = distance; entry->trace = *trace; /* - * Since we never remove from the dependency list, the list can - * be walked lockless by other CPUs, it's only allocation - * that must be protected by the spinlock. But this also means - * we must make new entries visible only once writes to the - * entry become visible - hence the RCU op: + * Both allocation and removal are done under the graph lock; but + * iteration is under RCU-sched; see look_up_lock_class() and + * lockdep_free_key_range(). */ list_add_tail_rcu(&entry->entry, head); @@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, else head = &lock->class->locks_before; - list_for_each_entry(entry, head, entry) { + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + + list_for_each_entry_rcu(entry, head, entry) { if (!lock_accessed(entry)) { unsigned int cq_depth; mark_lock_accessed(entry, lock); @@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, * We can walk it lock-free, because entries only get added * to the hash: */ - list_for_each_entry(chain, hash_head, entry) { + list_for_each_entry_rcu(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); @@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (unlikely(!debug_locks)) return; - if (subclass) + if (subclass) { + unsigned long flags; + + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; register_lock_class(lock, subclass, 1); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + } } EXPORT_SYMBOL_GPL(lockdep_init_map); @@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) return addr >= start && addr < start + size; } +/* + * Used in module.c to remove lock classes from memory that is going to be + * freed; and possibly re-used by other modules. + * + * We will have had one sync_sched() before getting here, so we're guaranteed + * nobody will look up these exact classes -- they're properly dead but still + * allocated. + */ void lockdep_free_key_range(void *start, unsigned long size) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i; @@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); else if (within(class->name, start, size)) @@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) if (locked) graph_unlock(); raw_local_irq_restore(flags); + + /* + * Wait for any possible iterators from look_up_lock_class() to pass + * before continuing to free the memory they refer to. + * + * sync_sched() is sufficient because the read-side is IRQ disable. + */ + synchronize_sched(); + + /* + * XXX at this point we could return the resources to the pool; + * instead we leak them. We would need to change to bitmap allocators + * instead of the linear allocators we have now. + */ } void lockdep_reset_lock(struct lockdep_map *lock) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i, j; @@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { int match = 0; for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index d1fe2ba5bac9..75e114bdf3f2 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) */ return; } - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* Wait until the lock holder passes the lock down. */ arch_mcs_spin_lock_contended(&node->locked); @@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { - struct mcs_spinlock *next = ACCESS_ONCE(node->next); + struct mcs_spinlock *next = READ_ONCE(node->next); if (likely(!next)) { /* @@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) + while (!(next = READ_ONCE(node->next))) cpu_relax_lowlatency(); } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 94674e5919cb..4cccea6b8934 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,7 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> -#include "mcs_spinlock.h" +#include <linux/osq_lock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, } #ifdef CONFIG_MUTEX_SPIN_ON_OWNER -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ - if (lock->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * lock->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); - - return owner->on_cpu; -} - /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. */ static noinline -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) { + bool ret = true; + rcu_read_lock(); - while (owner_running(lock, owner)) { - if (need_resched()) + while (lock->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking lock->owner still matches owner. If that fails, + * owner might point to freed memory. If it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + if (!owner->on_cpu || need_resched()) { + ret = false; break; + } cpu_relax_lowlatency(); } rcu_read_unlock(); - /* - * We break out the loop above on need_resched() and when the - * owner changed, which is a sign for heavy contention. Return - * success only when lock->owner is NULL. - */ - return lock->owner == NULL; + return ret; } /* @@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner) retval = owner->on_cpu; rcu_read_unlock(); @@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * As such, when deadlock detection needs to be * performed the optimistic spinning cannot be done. */ - if (ACCESS_ONCE(ww->ctx)) + if (READ_ONCE(ww->ctx)) break; } @@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) break; @@ -490,7 +481,7 @@ static inline int __sched __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); if (!hold_ctx) return 0; diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index c112d00341b0..dc85ee23a26f 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) prev = decode_cpu(old); node->prev = prev; - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* * Normally @prev is untouchable after the above store; because at that @@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */ - while (!ACCESS_ONCE(node->locked)) { + while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. */ @@ -148,7 +148,7 @@ unqueue: * Or we race against a concurrent unqueue()'s step-B, in which * case its step-C will write us a new @node->prev pointer. */ - prev = ACCESS_ONCE(node->prev); + prev = READ_ONCE(node->prev); } /* @@ -170,8 +170,8 @@ unqueue: * it will wait in Step-A. */ - ACCESS_ONCE(next->prev) = prev; - ACCESS_ONCE(prev->next) = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); return false; } @@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock) node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); return; } next = osq_wait_next(lock, node, NULL); if (next) - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index e16e5542bf13..b73279367087 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) * * @task: the task owning the mutex (owner) for which a chain walk is * probably needed - * @deadlock_detect: do we have to carry out deadlock detection? + * @chwalk: do we have to carry out deadlock detection? * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck * things for a task that has just got its priority adjusted, and * is waiting on a mutex) @@ -1193,6 +1193,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); if (unlikely(ret)) { + __set_current_state(TASK_RUNNING); if (rt_mutex_has_waiters(lock)) remove_waiter(lock, &waiter); rt_mutex_handle_deadlock(ret, chwalk, &waiter); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2555ae15ec14..3a5048572065 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) list_del(&waiter->list); tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2f7cc4076f50..3417d0172a5d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -14,8 +14,9 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/sched/rt.h> +#include <linux/osq_lock.h> -#include "mcs_spinlock.h" +#include "rwsem.h" /* * Guide to the rw_semaphore's count field for common values. @@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); @@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { if (!list_is_singular(&sem->wait_list)) rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + rwsem_set_owner(sem); return true; } @@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = ACCESS_ONCE(sem->count); + long old, count = READ_ONCE(sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); - if (old == count) + if (old == count) { + rwsem_set_owner(sem); return true; + } count = old; } @@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; - bool on_cpu = false; + bool ret = true; if (need_resched()) return false; rcu_read_lock(); - owner = ACCESS_ONCE(sem->owner); - if (owner) - on_cpu = owner->on_cpu; - rcu_read_unlock(); - - /* - * If sem->owner is not set, yet we have just recently entered the - * slowpath, then there is a possibility reader(s) may have the lock. - * To be safe, avoid spinning in these situations. - */ - return on_cpu; -} - -static inline bool owner_running(struct rw_semaphore *sem, - struct task_struct *owner) -{ - if (sem->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * sem->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); + owner = READ_ONCE(sem->owner); + if (!owner) { + long count = READ_ONCE(sem->count); + /* + * If sem->owner is not set, yet we have just recently entered the + * slowpath with the lock being active, then there is a possibility + * reader(s) may have the lock. To be safe, bail spinning in these + * situations. + */ + if (count & RWSEM_ACTIVE_MASK) + ret = false; + goto done; + } - return owner->on_cpu; + ret = owner->on_cpu; +done: + rcu_read_unlock(); + return ret; } static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) { + long count; + rcu_read_lock(); - while (owner_running(sem, owner)) { - if (need_resched()) - break; + while (sem->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking sem->owner still matches owner, if that fails, + * owner might point to free()d memory, if it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + /* abort spinning when need_resched or owner is not running */ + if (!owner->on_cpu || need_resched()) { + rcu_read_unlock(); + return false; + } cpu_relax_lowlatency(); } rcu_read_unlock(); + if (READ_ONCE(sem->owner)) + return true; /* new owner, continue spinning */ + /* - * We break out the loop above on need_resched() or when the - * owner changed, which is a sign for heavy contention. Return - * success only when sem->owner is NULL. + * When the owner is not set, the lock could be free or + * held by readers. Check the counter to verify the + * state. */ - return sem->owner == NULL; + count = READ_ONCE(sem->count); + return (count == 0 || count == RWSEM_WAITING_BIAS); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) @@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) goto done; while (true) { - owner = ACCESS_ONCE(sem->owner); + owner = READ_ONCE(sem->owner); if (owner && !rwsem_spin_on_owner(sem, owner)) break; @@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = ACCESS_ONCE(sem->count); + count = READ_ONCE(sem->count); /* * If there were already threads queued before us and there are diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e2d3bc7f03b4..205be0ce34de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -9,29 +9,9 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/rwsem.h> - #include <linux/atomic.h> -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ - sem->owner = current; -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ - sem->owner = NULL; -} - -#else -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ -} -#endif +#include "rwsem.h" /* * lock for reading diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h new file mode 100644 index 000000000000..870ed9a5b426 --- /dev/null +++ b/kernel/locking/rwsem.h @@ -0,0 +1,20 @@ +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ + sem->owner = current; +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ + sem->owner = NULL; +} + +#else +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ +} +#endif |