summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_bitops.txt66
-rw-r--r--Documentation/atomic_t.txt200
-rw-r--r--Documentation/locking/crossrelease.txt874
-rw-r--r--Documentation/memory-barriers.txt101
-rw-r--r--Documentation/static-keys.txt20
-rw-r--r--Documentation/translations/ko_KR/memory-barriers.txt5
-rw-r--r--arch/arc/include/asm/atomic.h2
-rw-r--r--arch/arm64/include/asm/spinlock.h11
-rw-r--r--arch/hexagon/include/asm/atomic.h2
-rw-r--r--arch/metag/include/asm/atomic_lock1.h2
-rw-r--r--arch/parisc/include/asm/atomic.h2
-rw-r--r--arch/powerpc/include/asm/barrier.h7
-rw-r--r--arch/powerpc/include/asm/spinlock.h3
-rw-r--r--arch/sparc/include/asm/atomic_32.h2
-rw-r--r--arch/tile/include/asm/atomic_32.h2
-rw-r--r--arch/x86/include/asm/atomic.h69
-rw-r--r--arch/x86/include/asm/atomic64_32.h81
-rw-r--r--arch/x86/include/asm/atomic64_64.h73
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--drivers/clocksource/arm_arch_timer.c6
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c5
-rw-r--r--fs/overlayfs/readdir.c4
-rw-r--r--fs/userfaultfd.c25
-rw-r--r--include/asm-generic/atomic64.h2
-rw-r--r--include/linux/atomic.h3
-rw-r--r--include/linux/completion.h45
-rw-r--r--include/linux/cpuset.h6
-rw-r--r--include/linux/futex.h7
-rw-r--r--include/linux/irqflags.h24
-rw-r--r--include/linux/jump_label.h33
-rw-r--r--include/linux/kasan-checks.h10
-rw-r--r--include/linux/lockdep.h161
-rw-r--r--include/linux/mm_types.h29
-rw-r--r--include/linux/rwsem-spinlock.h1
-rw-r--r--include/linux/rwsem.h1
-rw-r--r--include/linux/sched.h12
-rw-r--r--include/linux/sched/mm.h8
-rw-r--r--include/linux/spinlock.h41
-rw-r--r--init/Kconfig7
-rw-r--r--kernel/cgroup/cpuset.c7
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c22
-rw-r--r--kernel/jump_label.c104
-rw-r--r--kernel/locking/lockdep.c991
-rw-r--r--kernel/locking/lockdep_internals.h2
-rw-r--r--kernel/locking/lockdep_proc.c4
-rw-r--r--kernel/locking/lockdep_states.h1
-rw-r--r--kernel/locking/osq_lock.c13
-rw-r--r--kernel/locking/rtmutex_common.h29
-rw-r--r--kernel/locking/rwsem-spinlock.c37
-rw-r--r--kernel/locking/rwsem-xadd.c33
-rw-r--r--kernel/sched/completion.c11
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/sched/swait.c6
-rw-r--r--kernel/workqueue.c2
-rw-r--r--lib/Kconfig.debug21
-rw-r--r--mm/huge_memory.c20
-rw-r--r--mm/kasan/kasan.c4
-rw-r--r--mm/page_alloc.c49
-rw-r--r--mm/slab.h6
-rw-r--r--mm/slob.c6
-rw-r--r--mm/vmscan.c13
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/udp.c3
65 files changed, 2827 insertions, 523 deletions
diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt
new file mode 100644
index 000000000000..5550bfdcce5f
--- /dev/null
+++ b/Documentation/atomic_bitops.txt
@@ -0,0 +1,66 @@
+
+On atomic bitops.
+
+
+While our bitmap_{}() functions are non-atomic, we have a number of operations
+operating on single bits in a bitmap that are atomic.
+
+
+API
+---
+
+The single bit operations are:
+
+Non-RMW ops:
+
+ test_bit()
+
+RMW atomic operations without return value:
+
+ {set,clear,change}_bit()
+ clear_bit_unlock()
+
+RMW atomic operations with return value:
+
+ test_and_{set,clear,change}_bit()
+ test_and_set_bit_lock()
+
+Barriers:
+
+ smp_mb__{before,after}_atomic()
+
+
+All RMW atomic operations have a '__' prefixed variant which is non-atomic.
+
+
+SEMANTICS
+---------
+
+Non-atomic ops:
+
+In particular __clear_bit_unlock() suffers the same issue as atomic_set(),
+which is why the generic version maps to clear_bit_unlock(), see atomic_t.txt.
+
+
+RMW ops:
+
+The test_and_{}_bit() operations return the original value of the bit.
+
+
+ORDERING
+--------
+
+Like with atomic_t, the rule of thumb is:
+
+ - non-RMW operations are unordered;
+
+ - RMW operations that have no return value are unordered;
+
+ - RMW operations that have a return value are fully ordered.
+
+Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+clear_bit_unlock() which has RELEASE semantics.
+
+Since a platform only has a single means of achieving atomic operations
+the same barriers as for atomic_t are used, see atomic_t.txt.
+
diff --git a/Documentation/atomic_t.txt b/Documentation/atomic_t.txt
new file mode 100644
index 000000000000..eee127115277
--- /dev/null
+++ b/Documentation/atomic_t.txt
@@ -0,0 +1,200 @@
+
+On atomic types (atomic_t atomic64_t and atomic_long_t).
+
+The atomic type provides an interface to the architecture's means of atomic
+RMW operations between CPUs (atomic operations on MMIO are not supported and
+can lead to fatal traps on some platforms).
+
+API
+---
+
+The 'full' API consists of (atomic64_ and atomic_long_ prefixes omitted for
+brevity):
+
+Non-RMW ops:
+
+ atomic_read(), atomic_set()
+ atomic_read_acquire(), atomic_set_release()
+
+
+RMW atomic operations:
+
+Arithmetic:
+
+ atomic_{add,sub,inc,dec}()
+ atomic_{add,sub,inc,dec}_return{,_relaxed,_acquire,_release}()
+ atomic_fetch_{add,sub,inc,dec}{,_relaxed,_acquire,_release}()
+
+
+Bitwise:
+
+ atomic_{and,or,xor,andnot}()
+ atomic_fetch_{and,or,xor,andnot}{,_relaxed,_acquire,_release}()
+
+
+Swap:
+
+ atomic_xchg{,_relaxed,_acquire,_release}()
+ atomic_cmpxchg{,_relaxed,_acquire,_release}()
+ atomic_try_cmpxchg{,_relaxed,_acquire,_release}()
+
+
+Reference count (but please see refcount_t):
+
+ atomic_add_unless(), atomic_inc_not_zero()
+ atomic_sub_and_test(), atomic_dec_and_test()
+
+
+Misc:
+
+ atomic_inc_and_test(), atomic_add_negative()
+ atomic_dec_unless_positive(), atomic_inc_unless_negative()
+
+
+Barriers:
+
+ smp_mb__{before,after}_atomic()
+
+
+
+SEMANTICS
+---------
+
+Non-RMW ops:
+
+The non-RMW ops are (typically) regular LOADs and STOREs and are canonically
+implemented using READ_ONCE(), WRITE_ONCE(), smp_load_acquire() and
+smp_store_release() respectively.
+
+The one detail to this is that atomic_set{}() should be observable to the RMW
+ops. That is:
+
+ C atomic-set
+
+ {
+ atomic_set(v, 1);
+ }
+
+ P1(atomic_t *v)
+ {
+ atomic_add_unless(v, 1, 0);
+ }
+
+ P2(atomic_t *v)
+ {
+ atomic_set(v, 0);
+ }
+
+ exists
+ (v=2)
+
+In this case we would expect the atomic_set() from CPU1 to either happen
+before the atomic_add_unless(), in which case that latter one would no-op, or
+_after_ in which case we'd overwrite its result. In no case is "2" a valid
+outcome.
+
+This is typically true on 'normal' platforms, where a regular competing STORE
+will invalidate a LL/SC or fail a CMPXCHG.
+
+The obvious case where this is not so is when we need to implement atomic ops
+with a lock:
+
+ CPU0 CPU1
+
+ atomic_add_unless(v, 1, 0);
+ lock();
+ ret = READ_ONCE(v->counter); // == 1
+ atomic_set(v, 0);
+ if (ret != u) WRITE_ONCE(v->counter, 0);
+ WRITE_ONCE(v->counter, ret + 1);
+ unlock();
+
+the typical solution is to then implement atomic_set{}() with atomic_xchg().
+
+
+RMW ops:
+
+These come in various forms:
+
+ - plain operations without return value: atomic_{}()
+
+ - operations which return the modified value: atomic_{}_return()
+
+ these are limited to the arithmetic operations because those are
+ reversible. Bitops are irreversible and therefore the modified value
+ is of dubious utility.
+
+ - operations which return the original value: atomic_fetch_{}()
+
+ - swap operations: xchg(), cmpxchg() and try_cmpxchg()
+
+ - misc; the special purpose operations that are commonly used and would,
+ given the interface, normally be implemented using (try_)cmpxchg loops but
+ are time critical and can, (typically) on LL/SC architectures, be more
+ efficiently implemented.
+
+All these operations are SMP atomic; that is, the operations (for a single
+atomic variable) can be fully ordered and no intermediate state is lost or
+visible.
+
+
+ORDERING (go read memory-barriers.txt first)
+--------
+
+The rule of thumb:
+
+ - non-RMW operations are unordered;
+
+ - RMW operations that have no return value are unordered;
+
+ - RMW operations that have a return value are fully ordered;
+
+ - RMW operations that are conditional are unordered on FAILURE,
+ otherwise the above rules apply.
+
+Except of course when an operation has an explicit ordering like:
+
+ {}_relaxed: unordered
+ {}_acquire: the R of the RMW (or atomic_read) is an ACQUIRE
+ {}_release: the W of the RMW (or atomic_set) is a RELEASE
+
+Where 'unordered' is against other memory locations. Address dependencies are
+not defeated.
+
+Fully ordered primitives are ordered against everything prior and everything
+subsequent. Therefore a fully ordered primitive is like having an smp_mb()
+before and an smp_mb() after the primitive.
+
+
+The barriers:
+
+ smp_mb__{before,after}_atomic()
+
+only apply to the RMW ops and can be used to augment/upgrade the ordering
+inherent to the used atomic op. These barriers provide a full smp_mb().
+
+These helper barriers exist because architectures have varying implicit
+ordering on their SMP atomic primitives. For example our TSO architectures
+provide full ordered atomics and these barriers are no-ops.
+
+Thus:
+
+ atomic_fetch_add();
+
+is equivalent to:
+
+ smp_mb__before_atomic();
+ atomic_fetch_add_relaxed();
+ smp_mb__after_atomic();
+
+However the atomic_fetch_add() might be implemented more efficiently.
+
+Further, while something like:
+
+ smp_mb__before_atomic();
+ atomic_dec(&X);
+
+is a 'typical' RELEASE pattern, the barrier is strictly stronger than
+a RELEASE. Similarly for something like:
+
+
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt
new file mode 100644
index 000000000000..bdf1423d5f99
--- /dev/null
+++ b/Documentation/locking/crossrelease.txt
@@ -0,0 +1,874 @@
+Crossrelease
+============
+
+Started by Byungchul Park <byungchul.park@lge.com>
+
+Contents:
+
+ (*) Background
+
+ - What causes deadlock
+ - How lockdep works
+
+ (*) Limitation
+
+ - Limit lockdep
+ - Pros from the limitation
+ - Cons from the limitation
+ - Relax the limitation
+
+ (*) Crossrelease
+
+ - Introduce crossrelease
+ - Introduce commit
+
+ (*) Implementation
+
+ - Data structures
+ - How crossrelease works
+
+ (*) Optimizations
+
+ - Avoid duplication
+ - Lockless for hot paths
+
+ (*) APPENDIX A: What lockdep does to work aggresively
+
+ (*) APPENDIX B: How to avoid adding false dependencies
+
+
+==========
+Background
+==========
+
+What causes deadlock
+--------------------
+
+A deadlock occurs when a context is waiting for an event to happen,
+which is impossible because another (or the) context who can trigger the
+event is also waiting for another (or the) event to happen, which is
+also impossible due to the same reason.
+
+For example:
+
+ A context going to trigger event C is waiting for event A to happen.
+ A context going to trigger event A is waiting for event B to happen.
+ A context going to trigger event B is waiting for event C to happen.
+
+A deadlock occurs when these three wait operations run at the same time,
+because event C cannot be triggered if event A does not happen, which in
+turn cannot be triggered if event B does not happen, which in turn
+cannot be triggered if event C does not happen. After all, no event can
+be triggered since any of them never meets its condition to wake up.
+
+A dependency might exist between two waiters and a deadlock might happen
+due to an incorrect releationship between dependencies. Thus, we must
+define what a dependency is first. A dependency exists between them if:
+
+ 1. There are two waiters waiting for each event at a given time.
+ 2. The only way to wake up each waiter is to trigger its event.
+ 3. Whether one can be woken up depends on whether the other can.
+
+Each wait in the example creates its dependency like:
+
+ Event C depends on event A.
+ Event A depends on event B.
+ Event B depends on event C.
+
+ NOTE: Precisely speaking, a dependency is one between whether a
+ waiter for an event can be woken up and whether another waiter for
+ another event can be woken up. However from now on, we will describe
+ a dependency as if it's one between an event and another event for
+ simplicity.
+
+And they form circular dependencies like:
+
+ -> C -> A -> B -
+ / \
+ \ /
+ ----------------
+
+ where 'A -> B' means that event A depends on event B.
+
+Such circular dependencies lead to a deadlock since no waiter can meet
+its condition to wake up as described.
+
+CONCLUSION
+
+Circular dependencies cause a deadlock.
+
+
+How lockdep works
+-----------------
+
+Lockdep tries to detect a deadlock by checking dependencies created by
+lock operations, acquire and release. Waiting for a lock corresponds to
+waiting for an event, and releasing a lock corresponds to triggering an
+event in the previous section.
+
+In short, lockdep does:
+
+ 1. Detect a new dependency.
+ 2. Add the dependency into a global graph.
+ 3. Check if that makes dependencies circular.
+ 4. Report a deadlock or its possibility if so.
+
+For example, consider a graph built by lockdep that looks like:
+
+ A -> B -
+ \
+ -> E
+ /
+ C -> D -
+
+ where A, B,..., E are different lock classes.
+
+Lockdep will add a dependency into the graph on detection of a new
+dependency. For example, it will add a dependency 'E -> C' when a new
+dependency between lock E and lock C is detected. Then the graph will be:
+
+ A -> B -
+ \
+ -> E -
+ / \
+ -> C -> D - \
+ / /
+ \ /
+ ------------------
+
+ where A, B,..., E are different lock classes.
+
+This graph contains a subgraph which demonstrates circular dependencies:
+
+ -> E -
+ / \
+ -> C -> D - \
+ / /
+ \ /
+ ------------------
+
+ where C, D and E are different lock classes.
+
+This is the condition under which a deadlock might occur. Lockdep
+reports it on detection after adding a new dependency. This is the way
+how lockdep works.
+
+CONCLUSION
+
+Lockdep detects a deadlock or its possibility by checking if circular
+dependencies were created after adding each new dependency.
+
+
+==========
+Limitation
+==========
+
+Limit lockdep
+-------------
+
+Limiting lockdep to work on only typical locks e.g. spin locks and
+mutexes, which are released within the acquire context, the
+implementation becomes simple but its capacity for detection becomes
+limited. Let's check pros and cons in next section.
+
+
+Pros from the limitation
+------------------------
+
+Given the limitation, when acquiring a lock, locks in a held_locks
+cannot be released if the context cannot acquire it so has to wait to
+acquire it, which means all waiters for the locks in the held_locks are
+stuck. It's an exact case to create dependencies between each lock in
+the held_locks and the lock to acquire.
+
+For example:
+
+ CONTEXT X
+ ---------
+ acquire A
+ acquire B /* Add a dependency 'A -> B' */
+ release B
+ release A
+
+ where A and B are different lock classes.
+
+When acquiring lock A, the held_locks of CONTEXT X is empty thus no
+dependency is added. But when acquiring lock B, lockdep detects and adds
+a new dependency 'A -> B' between lock A in the held_locks and lock B.
+They can be simply added whenever acquiring each lock.
+
+And data required by lockdep exists in a local structure, held_locks
+embedded in task_struct. Forcing to access the data within the context,
+lockdep can avoid racy problems without explicit locks while handling
+the local data.
+
+Lastly, lockdep only needs to keep locks currently being held, to build
+a dependency graph. However, relaxing the limitation, it needs to keep
+even locks already released, because a decision whether they created
+dependencies might be long-deferred.
+
+To sum up, we can expect several advantages from the limitation:
+
+ 1. Lockdep can easily identify a dependency when acquiring a lock.
+ 2. Races are avoidable while accessing local locks in a held_locks.
+ 3. Lockdep only needs to keep locks currently being held.
+
+CONCLUSION
+
+Given the limitation, the implementation becomes simple and efficient.
+
+
+Cons from the limitation
+------------------------
+
+Given the limitation, lockdep is applicable only to typical locks. For
+example, page locks for page access or completions for synchronization
+cannot work with lockdep.
+
+Can we detect deadlocks below, under the limitation?
+
+Example 1:
+
+ CONTEXT X CONTEXT Y CONTEXT Z
+ --------- --------- ----------
+ mutex_lock A
+ lock_page B
+ lock_page B
+ mutex_lock A /* DEADLOCK */
+ unlock_page B held by X
+ unlock_page B
+ mutex_unlock A
+ mutex_unlock A
+
+ where A and B are different lock classes.
+
+No, we cannot.
+
+Example 2:
+
+ CONTEXT X CONTEXT Y
+ --------- ---------
+ mutex_lock A
+ mutex_lock A
+ wait_for_complete B /* DEADLOCK */
+ complete B
+ mutex_unlock A
+ mutex_unlock A
+
+ where A is a lock class and B is a completion variable.
+
+No, we cannot.
+
+CONCLUSION
+
+Given the limitation, lockdep cannot detect a deadlock or its
+possibility caused by page locks or completions.
+
+
+Relax the limitation
+--------------------
+
+Under the limitation, things to create dependencies are limited to
+typical locks. However, synchronization primitives like page locks and
+completions, which are allowed to be released in any context, also
+create dependencies and can cause a deadlock. So lockdep should track
+these locks to do a better job. We have to relax the limitation for
+these locks to work with lockdep.
+
+Detecting dependencies is very important for lockdep to work because
+adding a dependency means adding an opportunity to check whether it
+causes a deadlock. The more lockdep adds dependencies, the more it
+thoroughly works. Thus Lockdep has to do its best to detect and add as
+many true dependencies into a graph as possible.
+
+For example, considering only typical locks, lockdep builds a graph like:
+
+ A -> B -
+ \
+ -> E
+ /
+ C -> D -
+
+ where A, B,..., E are different lock classes.
+
+On the other hand, under the relaxation, additional dependencies might
+be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
+added thanks to the relaxation, the graph will be:
+
+ A -> B -
+ \
+ -> E -> GX
+ /
+ FX -> C -> D -
+
+ where A, B,..., E, FX and GX are different lock classes, and a suffix
+ 'X' is added on non-typical locks.
+
+The latter graph gives us more chances to check circular dependencies
+than the former. However, it might suffer performance degradation since
+relaxing the limitation, with which design and implementation of lockdep
+can be efficient, might introduce inefficiency inevitably. So lockdep
+should provide two options, strong detection and efficient detection.
+
+Choosing efficient detection:
+
+ Lockdep works with only locks restricted to be released within the
+ acquire context. However, lockdep works efficiently.
+
+Choosing strong detection:
+
+ Lockdep works with all synchronization primitives. However, lockdep
+ suffers performance degradation.
+
+CONCLUSION
+
+Relaxing the limitation, lockdep can add additional dependencies giving
+additional opportunities to check circular dependencies.
+
+
+============
+Crossrelease
+============
+
+Introduce crossrelease
+----------------------
+
+In order to allow lockdep to handle additional dependencies by what
+might be released in any context, namely 'crosslock', we have to be able
+to identify those created by crosslocks. The proposed 'crossrelease'
+feature provoides a way to do that.
+
+Crossrelease feature has to do:
+
+ 1. Identify dependencies created by crosslocks.
+ 2. Add the dependencies into a dependency graph.
+
+That's all. Once a meaningful dependency is added into graph, then
+lockdep would work with the graph as it did. The most important thing
+crossrelease feature has to do is to correctly identify and add true
+dependencies into the global graph.
+
+A dependency e.g. 'A -> B' can be identified only in the A's release
+context because a decision required to identify the dependency can be
+made only in the release context. That is to decide whether A can be
+released so that a waiter for A can be woken up. It cannot be made in
+other than the A's release context.
+
+It's no matter for typical locks because each acquire context is same as
+its release context, thus lockdep can decide whether a lock can be
+released in the acquire context. However for crosslocks, lockdep cannot
+make the decision in the acquire context but has to wait until the
+release context is identified.
+
+Therefore, deadlocks by crosslocks cannot be detected just when it
+happens, because those cannot be identified until the crosslocks are
+released. However, deadlock possibilities can be detected and it's very
+worth. See 'APPENDIX A' section to check why.
+
+CONCLUSION
+
+Using crossrelease feature, lockdep can work with what might be released
+in any context, namely crosslock.
+
+
+Introduce commit
+----------------
+
+Since crossrelease defers the work adding true dependencies of
+crosslocks until they are actually released, crossrelease has to queue
+all acquisitions which might create dependencies with the crosslocks.
+Then it identifies dependencies using the queued data in batches at a
+proper time. We call it 'commit'.
+
+There are four types of dependencies:
+
+1. TT type: 'typical lock A -> typical lock B'
+
+ Just when acquiring B, lockdep can see it's in the A's release
+ context. So the dependency between A and B can be identified
+ immediately. Commit is unnecessary.
+
+2. TC type: 'typical lock A -> crosslock BX'
+
+ Just when acquiring BX, lockdep can see it's in the A's release
+ context. So the dependency between A and BX can be identified
+ immediately. Commit is unnecessary, too.
+
+3. CT type: 'crosslock AX -> typical lock B'
+
+ When acquiring B, lockdep cannot identify the dependency because
+ there's no way to know if it's in the AX's release context. It has
+ to wait until the decision can be made. Commit is necessary.
+
+4. CC type: 'crosslock AX -> crosslock BX'
+
+ When acquiring BX, lockdep cannot identify the dependency because
+ there's no way to know if it's in the AX's release context. It has
+ to wait until the decision can be made. Commit is necessary.
+ But, handling CC type is not implemented yet. It's a future work.
+
+Lockdep can work without commit for typical locks, but commit step is
+necessary once crosslocks are involved. Introducing commit, lockdep
+performs three steps. What lockdep does in each step is:
+
+1. Acquisition: For typical locks, lockdep does what it originally did
+ and queues the lock so that CT type dependencies can be checked using
+ it at the commit step. For crosslocks, it saves data which will be
+ used at the commit step and increases a reference count for it.
+
+2. Commit: No action is reauired for typical locks. For crosslocks,
+ lockdep adds CT type dependencies using the data saved at the
+ acquisition step.
+
+3. Release: No changes are required for typical locks. When a crosslock
+ is released, it decreases a reference count for it.
+
+CONCLUSION
+
+Crossrelease introduces commit step to handle dependencies of crosslocks
+in batches at a proper time.
+
+
+==============
+Implementation
+==============
+
+Data structures
+---------------
+
+Crossrelease introduces two main data structures.
+
+1. hist_lock
+
+ This is an array embedded in task_struct, for keeping lock history so
+ that dependencies can be added using them at the commit step. Since
+ it's local data, it can be accessed locklessly in the owner context.
+ The array is filled at the acquisition step and consumed at the
+ commit step. And it's managed in circular manner.
+
+2. cross_lock
+
+ One per lockdep_map exists. This is for keeping data of crosslocks
+ and used at the commit step.
+
+
+How crossrelease works
+----------------------
+
+It's the key of how crossrelease works, to defer necessary works to an
+appropriate point in time and perform in at once at the commit step.
+Let's take a look with examples step by step, starting from how lockdep
+works without crossrelease for typical locks.
+
+ acquire A /* Push A onto held_locks */
+ acquire B /* Push B onto held_locks and add 'A -> B' */
+ acquire C /* Push C onto held_locks and add 'B -> C' */
+ release C /* Pop C from held_locks */
+ release B /* Pop B from held_locks */
+ release A /* Pop A from held_locks */
+
+ where A, B and C are different lock classes.
+
+ NOTE: This document assumes that readers already understand how
+ lockdep works without crossrelease thus omits details. But there's
+ one thing to note. Lockdep pretends to pop a lock from held_locks
+ when releasing it. But it's subtly different from the original pop
+ operation because lockdep allows other than the top to be poped.
+
+In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
+dependency every time acquiring a lock.
+
+After adding 'A -> B', a dependency graph will be:
+
+ A -> B
+
+ where A and B are different lock classes.
+
+And after adding 'B -> C', the graph will be:
+
+ A -> B -> C
+
+ where A, B and C are different lock classes.
+
+Let's performs commit step even for typical locks to add dependencies.
+Of course, commit step is not necessary for them, however, it would work
+well because this is a more general way.
+
+ acquire A
+ /*
+ * Queue A into hist_locks
+ *
+ * In hist_locks: A
+ * In graph: Empty
+ */
+
+ acquire B
+ /*
+ * Queue B into hist_locks
+ *
+ * In hist_locks: A, B
+ * In graph: Empty
+ */
+
+ acquire C
+ /*
+ * Queue C into hist_locks
+ *
+ * In hist_locks: A, B, C
+ * In graph: Empty
+ */
+
+ commit C
+ /*
+ * Add 'C -> ?'
+ * Answer the following to decide '?'
+ * What has been queued since acquire C: Nothing
+ *
+ * In hist_locks: A, B, C
+ * In graph: Empty
+ */
+
+ release C
+
+ commit B
+ /*
+ * Add 'B -> ?'
+ * Answer the following to decide '?'
+ * What has been queued since acquire B: C
+ *
+ * In hist_locks: A, B, C
+ * In graph: 'B -> C'
+ */
+
+ release B
+
+ commit A
+ /*
+ * Add 'A -> ?'
+ * Answer the following to decide '?'
+ * What has been queued since acquire A: B, C
+ *
+ * In hist_locks: A, B, C
+ * In graph: 'B -> C', 'A -> B', 'A -> C'
+ */
+
+ release A
+
+ where A, B and C are different lock classes.
+
+In this case, dependencies are added at the commit step as described.
+
+After commits for A, B and C, the graph will be:
+
+ A -> B -> C
+
+ where A, B and C are different lock classes.
+
+ NOTE: A dependency 'A -> C' is optimized out.
+
+We can see the former graph built without commit step is same as the
+latter graph built using commit steps. Of course the former way leads to
+earlier finish for building the graph, which means we can detect a
+deadlock or its possibility sooner. So the former way would be prefered
+when possible. But we cannot avoid using the latter way for crosslocks.
+
+Let's look at how commit steps work for crosslocks. In this case, the
+commit step is performed only on crosslock AX as real. And it assumes
+that the AX release context is different from the AX acquire context.
+
+ BX RELEASE CONTEXT BX ACQUIRE CONTEXT
+ ------------------ ------------------
+ acquire A
+ /*
+ * Push A onto held_locks
+ * Queue A into hist_locks
+ *
+ * In held_locks: A
+ * In hist_locks: A
+ * In graph: Empty
+ */
+
+ acquire BX
+ /*
+ * Add 'the top of held_locks -> BX'
+ *
+ * In held_locks: A
+ * In hist_locks: A
+ * In graph: 'A -> BX'
+ */
+
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ It must be guaranteed that the following operations are seen after
+ acquiring BX globally. It can be done by things like barrier.
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ acquire C
+ /*
+ * Push C onto held_locks
+ * Queue C into hist_locks
+ *
+ * In held_locks: C
+ * In hist_locks: C
+ * In graph: 'A -> BX'
+ */
+
+ release C
+ /*
+ * Pop C from held_locks
+ *
+ * In held_locks: Empty
+ * In hist_locks: C
+ * In graph: 'A -> BX'
+ */
+ acquire D
+ /*
+ * Push D onto held_locks
+ * Queue D into hist_locks
+ * Add 'the top of held_locks -> D'
+ *
+ * In held_locks: A, D
+ * In hist_locks: A, D
+ * In graph: 'A -> BX', 'A -> D'
+ */
+ acquire E
+ /*
+ * Push E onto held_locks
+ * Queue E into hist_locks
+ *
+ * In held_locks: E
+ * In hist_locks: C, E
+ * In graph: 'A -> BX', 'A -> D'
+ */
+
+ release E
+ /*
+ * Pop E from held_locks
+ *
+ * In held_locks: Empty
+ * In hist_locks: D, E
+ * In graph: 'A -> BX', 'A -> D'
+ */
+ release D
+ /*
+ * Pop D from held_locks
+ *
+ * In held_locks: A
+ * In hist_locks: A, D
+ * In graph: 'A -> BX', 'A -> D'
+ */
+ commit BX
+ /*
+ * Add 'BX -> ?'
+ * What has been queued since acquire BX: C, E
+ *
+ * In held_locks: Empty
+ * In hist_locks: D, E
+ * In graph: 'A -> BX', 'A -> D',
+ * 'BX -> C', 'BX -> E'
+ */
+
+ release BX
+ /*
+ * In held_locks: Empty
+ * In hist_locks: D, E
+ * In graph: 'A -> BX', 'A -> D',
+ * 'BX -> C', 'BX -> E'
+ */
+ release A
+ /*
+ * Pop A from held_locks
+ *
+ * In held_locks: Empty
+ * In hist_locks: A, D
+ * In graph: 'A -> BX', 'A -> D',
+ * 'BX -> C', 'BX -> E'
+ */
+
+ where A, BX, C,..., E are different lock classes, and a suffix 'X' is
+ added on crosslocks.
+
+Crossrelease considers all acquisitions after acqiuring BX are
+candidates which might create dependencies with BX. True dependencies
+will be determined when identifying the release context of BX. Meanwhile,
+all typical locks are queued so that they can be used at the commit step.
+And then two dependencies 'BX -> C' and 'BX -> E' are added at the
+commit step when identifying the release context.
+
+The final graph will be, with crossrelease:
+
+ -> C
+ /
+ -> BX -
+ / \
+ A - -> E
+ \
+ -> D
+
+ where A, BX, C,..., E are different lock classes, and a suffix 'X' is
+ added on crosslocks.
+
+However, the final graph will be, without crossrelease:
+
+ A -> D
+
+ where A and D are different lock classes.
+
+The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
+'BX -> E' giving additional opportunities to check if they cause
+deadlocks. This way lockdep can detect a deadlock or its possibility
+caused by crosslocks.
+
+CONCLUSION
+
+We checked how crossrelease works with several examples.
+
+
+=============
+Optimizations
+=============
+
+Avoid duplication
+-----------------
+
+Crossrelease feature uses a cache like what lockdep already uses for
+dependency chains, but this time it's for caching CT type dependencies.
+Once that dependency is cached, the same will never be added again.
+
+
+Lockless for hot paths
+----------------------
+
+To keep all locks for later use at the commit step, crossrelease adopts
+a local array embedded in task_struct, which makes access to the data
+lockless by forcing it to happen only within the owner context. It's
+like how lockdep handles held_locks. Lockless implmentation is important
+since typical locks are very frequently acquired and released.
+
+
+=================================================
+APPENDIX A: What lockdep does to work aggresively
+=================================================
+
+A deadlock actually occurs when all wait operations creating circular
+dependencies run at the same time. Even though they don't, a potential
+deadlock exists if the problematic dependencies exist. Thus it's
+meaningful to detect not only an actual deadlock but also its potential
+possibility. The latter is rather valuable. When a deadlock occurs
+actually, we can identify what happens in the system by some means or
+other even without lockdep. However, there's no way to detect possiblity
+without lockdep unless the whole code is parsed in head. It's terrible.
+Lockdep does the both, and crossrelease only focuses on the latter.
+
+Whether or not a deadlock actually occurs depends on several factors.
+For example, what order contexts are switched in is a factor. Assuming
+circular dependencies exist, a deadlock would occur when contexts are
+switched so that all wait operations creating the dependencies run
+simultaneously. Thus to detect a deadlock possibility even in the case
+that it has not occured yet, lockdep should consider all possible
+combinations of dependencies, trying to:
+
+1. Use a global dependency graph.
+
+ Lockdep combines all dependencies into one global graph and uses them,
+ regardless of which context generates them or what order contexts are
+ switched in. Aggregated dependencies are only considered so they are
+ prone to be circular if a problem exists.
+
+2. Check dependencies between classes instead of instances.
+
+ What actually causes a deadlock are instances of lock. However,
+ lockdep checks dependencies between classes instead of instances.
+ This way lockdep can detect a deadlock which has not happened but
+ might happen in future by others but the same class.
+
+3. Assume all acquisitions lead to waiting.
+
+ Although locks might be acquired without waiting which is essential
+ to create dependencies, lockdep assumes all acquisitions lead to
+ waiting since it might be true some time or another.
+
+CONCLUSION
+
+Lockdep detects not only an actual deadlock but also its possibility,
+and the latter is more valuable.
+
+
+==================================================
+APPENDIX B: How to avoid adding false dependencies
+==================================================
+
+Remind what a dependency is. A dependency exists if:
+
+ 1. There are two waiters waiting for each event at a given time.
+ 2. The only way to wake up each waiter is to trigger its event.
+ 3. Whether one can be woken up depends on whether the other can.
+
+For example:
+
+ acquire A
+ acquire B /* A dependency 'A -> B' exists */
+ release B
+ release A
+
+ where A and B are different lock classes.
+
+A depedency 'A -> B' exists since:
+
+ 1. A waiter for A and a waiter for B might exist when acquiring B.
+ 2. Only way to wake up each is to release what it waits for.
+ 3. Whether the waiter for A can be woken up depends on whether the
+ other can. IOW, TASK X cannot release A if it fails to acquire B.
+
+For another example:
+
+ TASK X TASK Y
+ ------ ------
+ acquire AX
+ acquire B /* A dependency 'AX -> B' exists */
+ release B
+ release AX held by Y
+
+ where AX and B are different lock classes, and a suffix 'X' is added
+ on crosslocks.
+
+Even in this case involving crosslocks, the same rule can be applied. A
+depedency 'AX -> B' exists since:
+
+ 1. A waiter for AX and a waiter for B might exist when acquiring B.
+ 2. Only way to wake up each is to release what it waits for.
+ 3. Whether the waiter for AX can be woken up depends on whether the
+ other can. IOW, TASK X cannot release AX if it fails to acquire B.
+
+Let's take a look at more complicated example:
+
+ TASK X TASK Y
+ ------ ------
+ acquire B
+ release B
+ fork Y
+ acquire AX
+ acquire C /* A dependency 'AX -> C' exists */
+ release C
+ release AX held by Y
+
+ where AX, B and C are different lock classes, and a suffix 'X' is
+ added on crosslocks.
+
+Does a dependency 'AX -> B' exist? Nope.
+
+Two waiters are essential to create a dependency. However, waiters for
+AX and B to create 'AX -> B' cannot exist at the same time in this
+example. Thus the dependency 'AX -> B' cannot be created.
+
+It would be ideal if the full set of true ones can be considered. But
+we can ensure nothing but what actually happened. Relying on what
+actually happens at runtime, we can anyway add only true ones, though
+they might be a subset of true ones. It's similar to how lockdep works
+for typical locks. There might be more true dependencies than what
+lockdep has detected in runtime. Lockdep has no choice but to rely on
+what actually happens. Crossrelease also relies on it.
+
+CONCLUSION
+
+Relying on what actually happens, lockdep can avoid adding false
+dependencies.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c4ddfcd5ee32..d1d1716f904b 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -498,11 +498,11 @@ And a couple of implicit varieties:
This means that ACQUIRE acts as a minimal "acquire" operation and
RELEASE acts as a minimal "release" operation.
-A subset of the atomic operations described in core-api/atomic_ops.rst have
-ACQUIRE and RELEASE variants in addition to fully-ordered and relaxed (no
-barrier semantics) definitions. For compound atomics performing both a load
-and a store, ACQUIRE semantics apply only to the load and RELEASE semantics
-apply only to the store portion of the operation.
+A subset of the atomic operations described in atomic_t.txt have ACQUIRE and
+RELEASE variants in addition to fully-ordered and relaxed (no barrier
+semantics) definitions. For compound atomics performing both a load and a
+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
+only to the store portion of the operation.
Memory barriers are only required where there's a possibility of interaction
between two CPUs or between a CPU and a device. If it can be guaranteed that
@@ -1876,8 +1876,7 @@ There are some more advanced barrier functions:
This makes sure that the death mark on the object is perceived to be set
*before* the reference counter is decremented.
- See Documentation/core-api/atomic_ops.rst for more information. See the
- "Atomic operations" subsection for information on where to use these.
+ See Documentation/atomic_{t,bitops}.txt for more information.
(*) lockless_dereference();
@@ -1982,10 +1981,7 @@ for each construct. These operations all imply certain barriers:
ACQUIRE operation has completed.
Memory operations issued before the ACQUIRE may be completed after
- the ACQUIRE operation has completed. An smp_mb__before_spinlock(),
- combined with a following ACQUIRE, orders prior stores against
- subsequent loads and stores. Note that this is weaker than smp_mb()!
- The smp_mb__before_spinlock() primitive is free on many architectures.
+ the ACQUIRE operation has completed.
(2) RELEASE operation implication:
@@ -2503,88 +2499,7 @@ operations are noted specially as some of them imply full memory barriers and
some don't, but they're very heavily relied on as a group throughout the
kernel.
-Any atomic operation that modifies some state in memory and returns information
-about the state (old or new) implies an SMP-conditional general memory barrier
-(smp_mb()) on each side of the actual operation (with the exception of
-explicit lock operations, described later). These include:
-
- xchg();
- atomic_xchg(); atomic_long_xchg();
- atomic_inc_return(); atomic_long_inc_return();
- atomic_dec_return(); atomic_long_dec_return();
- atomic_add_return(); atomic_long_add_return();
- atomic_sub_return(); atomic_long_sub_return();
- atomic_inc_and_test(); atomic_long_inc_and_test();
- atomic_dec_and_test(); atomic_long_dec_and_test();
- atomic_sub_and_test(); atomic_long_sub_and_test();
- atomic_add_negative(); atomic_long_add_negative();
- test_and_set_bit();
- test_and_clear_bit();
- test_and_change_bit();
-
- /* when succeeds */
- cmpxchg();
- atomic_cmpxchg(); atomic_long_cmpxchg();
- atomic_add_unless(); atomic_long_add_unless();
-
-These are used for such things as implementing ACQUIRE-class and RELEASE-class
-operations and adjusting reference counters towards object destruction, and as
-such the implicit memory barrier effects are necessary.
-
-
-The following operations are potential problems as they do _not_ imply memory
-barriers, but might be used for implementing such things as RELEASE-class
-operations:
-
- atomic_set();
- set_bit();
- clear_bit();
- change_bit();
-
-With these the appropriate explicit memory barrier should be used if necessary
-(smp_mb__before_atomic() for instance).
-
-
-The following also do _not_ imply memory barriers, and so may require explicit
-memory barriers under some circumstances (smp_mb__before_atomic() for
-instance):
-
- atomic_add();
- atomic_sub();
- atomic_inc();
- atomic_dec();
-
-If they're used for statistics generation, then they probably don't need memory
-barriers, unless there's a coupling between statistical data.
-
-If they're used for reference counting on an object to control its lifetime,
-they probably don't need memory barriers because either the reference count
-will be adjusted inside a locked section, or the caller will already hold
-sufficient references to make the lock, and thus a memory barrier unnecessary.
-
-If they're used for constructing a lock of some description, then they probably
-do need memory barriers as a lock primitive generally has to do things in a
-specific order.
-
-Basically, each usage case has to be carefully considered as to whether memory
-barriers are needed or not.
-
-The following operations are special locking primitives:
-
- test_and_set_bit_lock();
- clear_bit_unlock();
- __clear_bit_unlock();
-
-These implement ACQUIRE-class and RELEASE-class operations. These should be
-used in preference to other operations when implementing locking primitives,
-because their implementations can be optimised on many architectures.
-
-[!] Note that special memory barrier primitives are available for these
-situations because on some CPUs the atomic instructions used imply full memory
-barriers, and so barrier instructions are superfluous in conjunction with them,
-and in such cases the special barrier primitives will be no-ops.
-
-See Documentation/core-api/atomic_ops.rst for more information.
+See Documentation/atomic_t.txt for more information.
ACCESSING DEVICES
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index b83dfa1c0602..ab16efe0c79d 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -149,6 +149,26 @@ static_branch_inc(), will change the branch back to true. Likewise, if the
key is initialized false, a 'static_branch_inc()', will change the branch to
true. And then a 'static_branch_dec()', will again make the branch false.
+The state and the reference count can be retrieved with 'static_key_enabled()'
+and 'static_key_count()'. In general, if you use these functions, they
+should be protected with the same mutex used around the enable/disable
+or increment/decrement function.
+
+Note that switching branches results in some locks being taken,
+particularly the CPU hotplug lock (in order to avoid races against
+CPUs being brought in the kernel whilst the kernel is getting
+patched). Calling the static key API from within a hotplug notifier is
+thus a sure deadlock recipe. In order to still allow use of the
+functionnality, the following functions are provided:
+
+ static_key_enable_cpuslocked()
+ static_key_disable_cpuslocked()
+ static_branch_enable_cpuslocked()
+ static_branch_disable_cpuslocked()
+
+These functions are *not* general purpose, and must only be used when
+you really know that you're in the above context, and no other.
+
Where an array of keys is required, it can be defined as::
DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 38310dcd6620..bc80fc0e210f 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -1956,10 +1956,7 @@ MMIO 쓰기 배리어
뒤에 완료됩니다.
ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에
- 완료될 수 있습니다. smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는
- 코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서
- 맞춥니다. 이건 smp_mb() 보다 완화된 것임을 기억하세요! 많은 아키텍쳐에서
- smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다.
+ 완료될 수 있습니다.
(2) RELEASE 오퍼레이션의 영향:
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 54b54da6384c..11859287c52a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -123,6 +123,8 @@ static inline void atomic_set(atomic_t *v, int i)
atomic_ops_unlock(flags);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#endif
/*
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..ae4241ab19a8 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -358,14 +358,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
-/*
- * Accesses appearing in program order before a spin_lock() operation
- * can be reordered with accesses inside the critical section, by virtue
- * of arch_spin_lock being constructed using acquire semantics.
- *
- * In cases where this is problematic (e.g. try_to_wake_up), an
- * smp_mb__before_spinlock() can restore the required ordering.
- */
-#define smp_mb__before_spinlock() smp_mb()
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index a62ba368b27d..fb3dfb2a667e 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -42,6 +42,8 @@ static inline void atomic_set(atomic_t *v, int new)
);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
/**
* atomic_read - reads a word, atomically
* @v: pointer to atomic value
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index 6c1380a8a0d4..eee779f26cc4 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -37,6 +37,8 @@ static inline int atomic_set(atomic_t *v, int i)
return i;
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#define ATOMIC_OP(op, c_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 5394b9c5f914..17b98a87e5e2 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -65,6 +65,8 @@ static __inline__ void atomic_set(atomic_t *v, int i)
_atomic_spin_unlock_irqrestore(v, flags);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
static __inline__ int atomic_read(const atomic_t *v)
{
return READ_ONCE((v)->counter);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 25d42bd3f114..9c601adfc500 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,13 +74,6 @@ do { \
___p1; \
})
-/*
- * This must resolve to hwsync on SMP for the context switch path.
- * See _switch, and core scheduler context switch memory ordering
- * comments.
- */
-#define smp_mb__before_spinlock() smp_mb()
-
#include <asm-generic/barrier.h>
#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 8c1b913de6d7..c1b1ec94b06c 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -342,5 +342,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
#define arch_read_relax(lock) __rw_yield(lock)
#define arch_write_relax(lock) __rw_yield(lock)
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
+
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index ee3f11c43cda..7643e979e333 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -29,6 +29,8 @@ int atomic_xchg(atomic_t *, int);
int __atomic_add_unless(atomic_t *, int, int);
void atomic_set(atomic_t *, int);
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
#define atomic_read(v) ACCESS_ONCE((v)->counter)
#define atomic_add(i, v) ((void)atomic_add_return( (int)(i), (v)))
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index a93774255136..53a423e7cb92 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -101,6 +101,8 @@ static inline void atomic_set(atomic_t *v, int n)
_atomic_xchg(&v->counter, n);
}
+#define atomic_set_release(v, i) atomic_set((v), (i))
+
/* A 64bit atomic type */
typedef struct {
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 33380b871463..0874ebda3069 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -197,35 +197,56 @@ static inline int atomic_xchg(atomic_t *v, int new)
return xchg(&v->counter, new);
}
-#define ATOMIC_OP(op) \
-static inline void atomic_##op(int i, atomic_t *v) \
-{ \
- asm volatile(LOCK_PREFIX #op"l %1,%0" \
- : "+m" (v->counter) \
- : "ir" (i) \
- : "memory"); \
+static inline void atomic_and(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "andl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
+}
+
+static inline int atomic_fetch_and(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
+
+ do { } while (!atomic_try_cmpxchg(v, &val, val & i));
+
+ return val;
}
-#define ATOMIC_FETCH_OP(op, c_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
-{ \
- int val = atomic_read(v); \
- do { \
- } while (!atomic_try_cmpxchg(v, &val, val c_op i)); \
- return val; \
+static inline void atomic_or(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "orl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
}
-#define ATOMIC_OPS(op, c_op) \
- ATOMIC_OP(op) \
- ATOMIC_FETCH_OP(op, c_op)
+static inline int atomic_fetch_or(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
-ATOMIC_OPS(and, &)
-ATOMIC_OPS(or , |)
-ATOMIC_OPS(xor, ^)
+ do { } while (!atomic_try_cmpxchg(v, &val, val | i));
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP
+ return val;
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "xorl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i)
+ : "memory");
+}
+
+static inline int atomic_fetch_xor(int i, atomic_t *v)
+{
+ int val = atomic_read(v);
+
+ do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
+
+ return val;
+}
/**
* __atomic_add_unless - add unless the number is already a given value
@@ -239,10 +260,12 @@ ATOMIC_OPS(xor, ^)
static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c = atomic_read(v);
+
do {
if (unlikely(c == u))
break;
} while (!atomic_try_cmpxchg(v, &c, c + a));
+
return c;
}
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 71d7705fb303..9e206f31ce2a 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -312,37 +312,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-#define ATOMIC64_OP(op, c_op) \
-static inline void atomic64_##op(long long i, atomic64_t *v) \
-{ \
- long long old, c = 0; \
- while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
- c = old; \
+static inline void atomic64_and(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ c = old;
}
-#define ATOMIC64_FETCH_OP(op, c_op) \
-static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \
-{ \
- long long old, c = 0; \
- while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c) \
- c = old; \
- return old; \
+static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ c = old;
+
+ return old;
}
-ATOMIC64_FETCH_OP(add, +)
+static inline void atomic64_or(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
-#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
+ while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ c = old;
+}
+
+static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ c = old;
+
+ return old;
+}
-#define ATOMIC64_OPS(op, c_op) \
- ATOMIC64_OP(op, c_op) \
- ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_xor(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ c = old;
+}
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ c = old;
+
+ return old;
+}
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+{
+ long long old, c = 0;
+
+ while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
+ c = old;
+
+ return old;
+}
+
+#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6189a433c9a9..5d9de36a2f04 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -177,7 +177,7 @@ static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
}
#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, long *old, long new)
+static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
{
return try_cmpxchg(&v->counter, old, new);
}
@@ -198,7 +198,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
*/
static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
{
- long c = atomic64_read(v);
+ s64 c = atomic64_read(v);
do {
if (unlikely(c == u))
return false;
@@ -217,7 +217,7 @@ static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
*/
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
- long dec, c = atomic64_read(v);
+ s64 dec, c = atomic64_read(v);
do {
dec = c - 1;
if (unlikely(dec < 0))
@@ -226,34 +226,55 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
return dec;
}
-#define ATOMIC64_OP(op) \
-static inline void atomic64_##op(long i, atomic64_t *v) \
-{ \
- asm volatile(LOCK_PREFIX #op"q %1,%0" \
- : "+m" (v->counter) \
- : "er" (i) \
- : "memory"); \
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "andq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
}
-#define ATOMIC64_FETCH_OP(op, c_op) \
-static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
-{ \
- long val = atomic64_read(v); \
- do { \
- } while (!atomic64_try_cmpxchg(v, &val, val c_op i)); \
- return val; \
+static inline long atomic64_fetch_and(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
+
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val & i));
+ return val;
}
-#define ATOMIC64_OPS(op, c_op) \
- ATOMIC64_OP(op) \
- ATOMIC64_FETCH_OP(op, c_op)
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "orq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
+}
-ATOMIC64_OPS(and, &)
-ATOMIC64_OPS(or, |)
-ATOMIC64_OPS(xor, ^)
+static inline long atomic64_fetch_or(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val | i));
+ return val;
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "xorq %1,%0"
+ : "+m" (v->counter)
+ : "er" (i)
+ : "memory");
+}
+
+static inline long atomic64_fetch_xor(long i, atomic64_t *v)
+{
+ s64 val = atomic64_read(v);
+
+ do {
+ } while (!atomic64_try_cmpxchg(v, &val, val ^ i));
+ return val;
+}
#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index d90296d061e8..b5069e802d5c 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -157,7 +157,7 @@ extern void __add_wrong_size(void)
#define __raw_try_cmpxchg(_ptr, _pold, _new, size, lock) \
({ \
bool success; \
- __typeof__(_ptr) _old = (_pold); \
+ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
switch (size) { \
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index aae87c4c546e..c62e71614c75 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -455,7 +455,11 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa
per_cpu(timer_unstable_counter_workaround, i) = wa;
}
- static_branch_enable(&arch_timer_read_ool_enabled);
+ /*
+ * Use the locked version, as we're called from the CPU
+ * hotplug framework. Otherwise, we end-up in deadlock-land.
+ */
+ static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled);
/*
* Don't use the vdso fastpath if errata require using the
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 00d8967c8512..0511fce5c947 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -28,6 +28,7 @@
#include <linux/debugfs.h>
#include <linux/sort.h>
+#include <linux/sched/mm.h>
#include "intel_drv.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
@@ -4331,7 +4332,7 @@ i915_drop_caches_set(void *data, u64 val)
mutex_unlock(&dev->struct_mutex);
}
- lockdep_set_current_reclaim_state(GFP_KERNEL);
+ fs_reclaim_acquire(GFP_KERNEL);
if (val & DROP_BOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
@@ -4340,7 +4341,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_SHRINK_ALL)
i915_gem_shrink_all(dev_priv);
- lockdep_clear_current_reclaim_state();
+ fs_reclaim_release(GFP_KERNEL);
if (val & DROP_FREED) {
synchronize_rcu();
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3d424a51cabb..f0fd3adb1693 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -446,14 +446,14 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
ovl_path_upper(dentry, &upperpath);
realfile = ovl_path_open(&upperpath, O_RDONLY);
- smp_mb__before_spinlock();
+
inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
inode_unlock(inode);
return PTR_ERR(realfile);
}
- od->upperfile = realfile;
+ smp_store_release(&od->upperfile, realfile);
} else {
/* somebody has beaten us to it */
if (!IS_ERR(realfile))
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b0d5897bc4e6..886085b47c75 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -109,27 +109,24 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
goto out;
WRITE_ONCE(uwq->waken, true);
/*
- * The implicit smp_mb__before_spinlock in try_to_wake_up()
- * renders uwq->waken visible to other CPUs before the task is
- * waken.
+ * The Program-Order guarantees provided by the scheduler
+ * ensure uwq->waken is visible before the task is woken.
*/
ret = wake_up_state(wq->private, mode);
- if (ret)
+ if (ret) {
/*
* Wake only once, autoremove behavior.
*
- * After the effect of list_del_init is visible to the
- * other CPUs, the waitqueue may disappear from under
- * us, see the !list_empty_careful() in
- * handle_userfault(). try_to_wake_up() has an
- * implicit smp_mb__before_spinlock, and the
- * wq->private is read before calling the extern
- * function "wake_up_state" (which in turns calls
- * try_to_wake_up). While the spin_lock;spin_unlock;
- * wouldn't be enough, the smp_mb__before_spinlock is
- * enough to avoid an explicit smp_mb() here.
+ * After the effect of list_del_init is visible to the other
+ * CPUs, the waitqueue may disappear from under us, see the
+ * !list_empty_careful() in handle_userfault().
+ *
+ * try_to_wake_up() has an implicit smp_mb(), and the
+ * wq->private is read before calling the extern function
+ * "wake_up_state" (which in turns calls try_to_wake_up).
*/
list_del_init(&wq->entry);
+ }
out:
return ret;
}
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index dad68bf46c77..8d28eb010d0d 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -21,6 +21,8 @@ typedef struct {
extern long long atomic64_read(const atomic64_t *v);
extern void atomic64_set(atomic64_t *v, long long i);
+#define atomic64_set_release(v, i) atomic64_set((v), (i))
+
#define ATOMIC64_OP(op) \
extern void atomic64_##op(long long a, atomic64_t *v);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index c56be7410130..40d6bfec0e0d 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,6 +38,9 @@
* Besides, if an arch has a special barrier for acquire/release, it could
* implement its own __atomic_op_* and use the same framework for building
* variants
+ *
+ * If an architecture overrides __atomic_op_acquire() it will probably want
+ * to define smp_mb__after_spinlock().
*/
#ifndef __atomic_op_acquire
#define __atomic_op_acquire(op, args...) \
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 5d5aaae3af43..9bcebf509b4d 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -9,6 +9,9 @@
*/
#include <linux/wait.h>
+#ifdef CONFIG_LOCKDEP_COMPLETE
+#include <linux/lockdep.h>
+#endif
/*
* struct completion - structure used to maintain state for a "completion"
@@ -25,10 +28,50 @@
struct completion {
unsigned int done;
wait_queue_head_t wait;
+#ifdef CONFIG_LOCKDEP_COMPLETE
+ struct lockdep_map_cross map;
+#endif
};
+#ifdef CONFIG_LOCKDEP_COMPLETE
+static inline void complete_acquire(struct completion *x)
+{
+ lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_);
+}
+
+static inline void complete_release(struct completion *x)
+{
+ lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_);
+}
+
+static inline void complete_release_commit(struct completion *x)
+{
+ lock_commit_crosslock((struct lockdep_map *)&x->map);
+}
+
+#define init_completion(x) \
+do { \
+ static struct lock_class_key __key; \
+ lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \
+ "(complete)" #x, \
+ &__key, 0); \
+ __init_completion(x); \
+} while (0)
+#else
+#define init_completion(x) __init_completion(x)
+static inline void complete_acquire(struct completion *x) {}
+static inline void complete_release(struct completion *x) {}
+static inline void complete_release_commit(struct completion *x) {}
+#endif
+
+#ifdef CONFIG_LOCKDEP_COMPLETE
+#define COMPLETION_INITIALIZER(work) \
+ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
+ STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) }
+#else
#define COMPLETION_INITIALIZER(work) \
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+#endif
#define COMPLETION_INITIALIZER_ONSTACK(work) \
({ init_completion(&work); work; })
@@ -70,7 +113,7 @@ struct completion {
* This inline function will initialize a dynamically created completion
* structure.
*/
-static inline void init_completion(struct completion *x)
+static inline void __init_completion(struct completion *x)
{
x->done = 0;
init_waitqueue_head(&x->wait);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 898cfe2eeb42..e74655d941b7 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -37,12 +37,6 @@ static inline bool cpusets_enabled(void)
return static_branch_unlikely(&cpusets_enabled_key);
}
-static inline int nr_cpusets(void)
-{
- /* jump label reference count + the top-level cpuset */
- return static_key_count(&cpusets_enabled_key.key) + 1;
-}
-
static inline void cpuset_inc(void)
{
static_branch_inc(&cpusets_pre_enable_key);
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 7c5b694864cd..f36bfd26f998 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -54,7 +54,6 @@ union futex_key {
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
-extern void exit_pi_state_list(struct task_struct *curr);
#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
#define futex_cmpxchg_enabled 1
#else
@@ -64,8 +63,14 @@ extern int futex_cmpxchg_enabled;
static inline void exit_robust_list(struct task_struct *curr)
{
}
+#endif
+
+#ifdef CONFIG_FUTEX_PI
+extern void exit_pi_state_list(struct task_struct *curr);
+#else
static inline void exit_pi_state_list(struct task_struct *curr)
{
}
#endif
+
#endif
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 5dd1272d1ab2..5fdd93bb9300 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -23,10 +23,26 @@
# define trace_softirq_context(p) ((p)->softirq_context)
# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled)
# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
-# define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
-# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
-# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
-# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
+# define trace_hardirq_enter() \
+do { \
+ current->hardirq_context++; \
+ crossrelease_hist_start(XHLOCK_HARD); \
+} while (0)
+# define trace_hardirq_exit() \
+do { \
+ current->hardirq_context--; \
+ crossrelease_hist_end(XHLOCK_HARD); \
+} while (0)
+# define lockdep_softirq_enter() \
+do { \
+ current->softirq_context++; \
+ crossrelease_hist_start(XHLOCK_SOFT); \
+} while (0)
+# define lockdep_softirq_exit() \
+do { \
+ current->softirq_context--; \
+ crossrelease_hist_end(XHLOCK_SOFT); \
+} while (0)
# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
#else
# define trace_hardirqs_on() do { } while (0)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2afd74b9d844..cd5861651b17 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -163,6 +163,8 @@ extern void jump_label_apply_nops(struct module *mod);
extern int static_key_count(struct static_key *key);
extern void static_key_enable(struct static_key *key);
extern void static_key_disable(struct static_key *key);
+extern void static_key_enable_cpuslocked(struct static_key *key);
+extern void static_key_disable_cpuslocked(struct static_key *key);
/*
* We should be using ATOMIC_INIT() for initializing .enabled, but
@@ -234,24 +236,29 @@ static inline int jump_label_apply_nops(struct module *mod)
static inline void static_key_enable(struct static_key *key)
{
- int count = static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
+ STATIC_KEY_CHECK_USE();
- if (!count)
- static_key_slow_inc(key);
+ if (atomic_read(&key->enabled) != 0) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
+ return;
+ }
+ atomic_set(&key->enabled, 1);
}
static inline void static_key_disable(struct static_key *key)
{
- int count = static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
+ STATIC_KEY_CHECK_USE();
- if (count)
- static_key_slow_dec(key);
+ if (atomic_read(&key->enabled) != 1) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
+ return;
+ }
+ atomic_set(&key->enabled, 0);
}
+#define static_key_enable_cpuslocked(k) static_key_enable((k))
+#define static_key_disable_cpuslocked(k) static_key_disable((k))
+
#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) }
#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) }
@@ -413,8 +420,10 @@ extern bool ____wrong_branch_error(void);
* Normal usage; boolean enable/disable.
*/
-#define static_branch_enable(x) static_key_enable(&(x)->key)
-#define static_branch_disable(x) static_key_disable(&(x)->key)
+#define static_branch_enable(x) static_key_enable(&(x)->key)
+#define static_branch_disable(x) static_key_disable(&(x)->key)
+#define static_branch_enable_cpuslocked(x) static_key_enable_cpuslocked(&(x)->key)
+#define static_branch_disable_cpuslocked(x) static_key_disable_cpuslocked(&(x)->key)
#endif /* __ASSEMBLY__ */
diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h
index b7f8aced7870..41960fecf783 100644
--- a/include/linux/kasan-checks.h
+++ b/include/linux/kasan-checks.h
@@ -2,11 +2,13 @@
#define _LINUX_KASAN_CHECKS_H
#ifdef CONFIG_KASAN
-void kasan_check_read(const void *p, unsigned int size);
-void kasan_check_write(const void *p, unsigned int size);
+void kasan_check_read(const volatile void *p, unsigned int size);
+void kasan_check_write(const volatile void *p, unsigned int size);
#else
-static inline void kasan_check_read(const void *p, unsigned int size) { }
-static inline void kasan_check_write(const void *p, unsigned int size) { }
+static inline void kasan_check_read(const volatile void *p, unsigned int size)
+{ }
+static inline void kasan_check_write(const volatile void *p, unsigned int size)
+{ }
#endif
#endif
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index fffe49f188e6..651cc61af041 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -29,7 +29,7 @@ extern int lock_stat;
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-(
*/
-#define XXX_LOCK_USAGE_STATES (1+3*4)
+#define XXX_LOCK_USAGE_STATES (1+2*4)
/*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes
@@ -155,6 +155,12 @@ struct lockdep_map {
int cpu;
unsigned long ip;
#endif
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+ /*
+ * Whether it's a crosslock.
+ */
+ int cross;
+#endif
};
static inline void lockdep_copy_map(struct lockdep_map *to,
@@ -258,8 +264,95 @@ struct held_lock {
unsigned int hardirqs_off:1;
unsigned int references:12; /* 32 bits */
unsigned int pin_count;
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+ /*
+ * Generation id.
+ *
+ * A value of cross_gen_id will be stored when holding this,
+ * which is globally increased whenever each crosslock is held.
+ */
+ unsigned int gen_id;
+#endif
+};
+
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+#define MAX_XHLOCK_TRACE_ENTRIES 5
+
+/*
+ * This is for keeping locks waiting for commit so that true dependencies
+ * can be added at commit step.
+ */
+struct hist_lock {
+ /*
+ * Id for each entry in the ring buffer. This is used to
+ * decide whether the ring buffer was overwritten or not.
+ *
+ * For example,
+ *
+ * |<----------- hist_lock ring buffer size ------->|
+ * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+ * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii.......................
+ *
+ * where 'p' represents an acquisition in process
+ * context, 'i' represents an acquisition in irq
+ * context.
+ *
+ * In this example, the ring buffer was overwritten by
+ * acquisitions in irq context, that should be detected on
+ * rollback or commit.
+ */
+ unsigned int hist_id;
+
+ /*
+ * Seperate stack_trace data. This will be used at commit step.
+ */
+ struct stack_trace trace;
+ unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES];
+
+ /*
+ * Seperate hlock instance. This will be used at commit step.
+ *
+ * TODO: Use a smaller data structure containing only necessary
+ * data. However, we should make lockdep code able to handle the
+ * smaller one first.
+ */
+ struct held_lock hlock;
+};
+
+/*
+ * To initialize a lock as crosslock, lockdep_init_map_crosslock() should
+ * be called instead of lockdep_init_map().
+ */
+struct cross_lock {
+ /*
+ * When more than one acquisition of crosslocks are overlapped,
+ * we have to perform commit for them based on cross_gen_id of
+ * the first acquisition, which allows us to add more true
+ * dependencies.
+ *
+ * Moreover, when no acquisition of a crosslock is in progress,
+ * we should not perform commit because the lock might not exist
+ * any more, which might cause incorrect memory access. So we
+ * have to track the number of acquisitions of a crosslock.
+ */
+ int nr_acquire;
+
+ /*
+ * Seperate hlock instance. This will be used at commit step.
+ *
+ * TODO: Use a smaller data structure containing only necessary
+ * data. However, we should make lockdep code able to handle the
+ * smaller one first.
+ */
+ struct held_lock hlock;
};
+struct lockdep_map_cross {
+ struct lockdep_map map;
+ struct cross_lock xlock;
+};
+#endif
+
/*
* Initialization, self-test and debugging-output methods:
*/
@@ -282,13 +375,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass);
/*
- * To initialize a lockdep_map statically use this macro.
- * Note that _name must not be NULL.
- */
-#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
- { .name = (_name), .key = (void *)(_key), }
-
-/*
* Reinitialize a lock key - for cases where there is special locking or
* special initialization of locks so that the validator gets the scope
* of dependencies wrong: they are either too broad (they need a class-split)
@@ -363,10 +449,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
-extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
-extern void lockdep_clear_current_reclaim_state(void);
-extern void lockdep_trace_alloc(gfp_t mask);
-
struct pin_cookie { unsigned int val; };
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
@@ -375,7 +457,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
-# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
+# define INIT_LOCKDEP .lockdep_recursion = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
@@ -416,9 +498,6 @@ static inline void lockdep_on(void)
# define lock_downgrade(l, i) do { } while (0)
# define lock_set_class(l, n, k, s, i) do { } while (0)
# define lock_set_subclass(l, s, i) do { } while (0)
-# define lockdep_set_current_reclaim_state(g) do { } while (0)
-# define lockdep_clear_current_reclaim_state() do { } while (0)
-# define lockdep_trace_alloc(g) do { } while (0)
# define lockdep_info() do { } while (0)
# define lockdep_init_map(lock, name, key, sub) \
do { (void)(name); (void)(key); } while (0)
@@ -467,6 +546,56 @@ struct pin_cookie { };
#endif /* !LOCKDEP */
+enum xhlock_context_t {
+ XHLOCK_HARD,
+ XHLOCK_SOFT,
+ XHLOCK_PROC,
+ XHLOCK_CTX_NR,
+};
+
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+extern void lockdep_init_map_crosslock(struct lockdep_map *lock,
+ const char *name,
+ struct lock_class_key *key,
+ int subclass);
+extern void lock_commit_crosslock(struct lockdep_map *lock);
+
+/*
+ * What we essencially have to initialize is 'nr_acquire'. Other members
+ * will be initialized in add_xlock().
+ */
+#define STATIC_CROSS_LOCK_INIT() \
+ { .nr_acquire = 0,}
+
+#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \
+ { .map.name = (_name), .map.key = (void *)(_key), \
+ .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), }
+
+/*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+ { .name = (_name), .key = (void *)(_key), .cross = 0, }
+
+extern void crossrelease_hist_start(enum xhlock_context_t c);
+extern void crossrelease_hist_end(enum xhlock_context_t c);
+extern void lockdep_init_task(struct task_struct *task);
+extern void lockdep_free_task(struct task_struct *task);
+#else
+/*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+ { .name = (_name), .key = (void *)(_key), }
+
+static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
+static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
+static inline void lockdep_init_task(struct task_struct *task) {}
+static inline void lockdep_free_task(struct task_struct *task) {}
+#endif
+
#ifdef CONFIG_LOCK_STAT
extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cadee0a3508..dc1edec05a3f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -535,6 +535,10 @@ extern void tlb_finish_mmu(struct mmu_gather *tlb,
*/
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
+ /*
+ * Must be called with PTL held; such that our PTL acquire will have
+ * observed the store from set_tlb_flush_pending().
+ */
return atomic_read(&mm->tlb_flush_pending) > 0;
}
@@ -556,10 +560,29 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
atomic_inc(&mm->tlb_flush_pending);
/*
- * Guarantee that the tlb_flush_pending increase does not leak into the
- * critical section updating the page tables
+ * The only time this value is relevant is when there are indeed pages
+ * to flush. And we'll only flush pages after changing them, which
+ * requires the PTL.
+ *
+ * So the ordering here is:
+ *
+ * atomic_inc(&mm->tlb_flush_pending);
+ * spin_lock(&ptl);
+ * ...
+ * set_pte_at();
+ * spin_unlock(&ptl);
+ *
+ * spin_lock(&ptl)
+ * mm_tlb_flush_pending();
+ * ....
+ * spin_unlock(&ptl);
+ *
+ * flush_tlb_range();
+ * atomic_dec(&mm->tlb_flush_pending);
+ *
+ * So the =true store is constrained by the PTL unlock, and the =false
+ * store is constrained by the TLB invalidate.
*/
- smp_mb__before_spinlock();
}
/* Clearing is done after a TLB flush, which also provides a barrier. */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index ae0528b834cd..e784761a4443 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,6 +32,7 @@ struct rw_semaphore {
#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
+extern int __must_check __down_read_killable(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
extern void __down_write(struct rw_semaphore *sem);
extern int __must_check __down_write_killable(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index dd1d14250340..0ad7318ff299 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -44,6 +44,7 @@ struct rw_semaphore {
};
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8337e2db0bb2..772c5f643764 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -846,7 +846,17 @@ struct task_struct {
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
- gfp_t lockdep_reclaim_gfp;
+#endif
+
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+#define MAX_XHLOCKS_NR 64UL
+ struct hist_lock *xhlocks; /* Crossrelease history locks */
+ unsigned int xhlock_idx;
+ /* For restoring at history boundaries */
+ unsigned int xhlock_idx_hist[XHLOCK_CTX_NR];
+ unsigned int hist_id;
+ /* For overwrite check at each context exit */
+ unsigned int hist_id_save[XHLOCK_CTX_NR];
#endif
#ifdef CONFIG_UBSAN
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2b24a6974847..2b0a281f9d26 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags)
return flags;
}
+#ifdef CONFIG_LOCKDEP
+extern void fs_reclaim_acquire(gfp_t gfp_mask);
+extern void fs_reclaim_release(gfp_t gfp_mask);
+#else
+static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
+static inline void fs_reclaim_release(gfp_t gfp_mask) { }
+#endif
+
static inline unsigned int memalloc_noio_save(void)
{
unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index d9510e8522d4..4e8cce19b507 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -118,16 +118,39 @@ do { \
#endif
/*
- * Despite its name it doesn't necessarily has to be a full barrier.
- * It should only guarantee that a STORE before the critical section
- * can not be reordered with LOADs and STOREs inside this section.
- * spin_lock() is the one-way barrier, this LOAD can not escape out
- * of the region. So the default implementation simply ensures that
- * a STORE can not move into the critical section, smp_wmb() should
- * serialize it with another STORE done by spin_lock().
+ * This barrier must provide two things:
+ *
+ * - it must guarantee a STORE before the spin_lock() is ordered against a
+ * LOAD after it, see the comments at its two usage sites.
+ *
+ * - it must ensure the critical section is RCsc.
+ *
+ * The latter is important for cases where we observe values written by other
+ * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ *
+ * CPU0 CPU1 CPU2
+ *
+ * for (;;) {
+ * if (READ_ONCE(X))
+ * break;
+ * }
+ * X=1
+ * <sched-out>
+ * <sched-in>
+ * r = X;
+ *
+ * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
+ * we get migrated and CPU2 sees X==0.
+ *
+ * Since most load-store architectures implement ACQUIRE with an smp_mb() after
+ * the LL/SC loop, they need no further barriers. Similarly all our TSO
+ * architectures imply an smp_mb() for each atomic instruction and equally don't
+ * need more.
+ *
+ * Architectures that can implement ACQUIRE better need to take care.
*/
-#ifndef smp_mb__before_spinlock
-#define smp_mb__before_spinlock() smp_wmb()
+#ifndef smp_mb__after_spinlock
+#define smp_mb__after_spinlock() do { } while (0)
#endif
/**
diff --git a/init/Kconfig b/init/Kconfig
index 8514b25db21c..5f0ef850e808 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1275,12 +1275,17 @@ config BASE_FULL
config FUTEX
bool "Enable futex support" if EXPERT
default y
- select RT_MUTEXES
+ imply RT_MUTEXES
help
Disabling this option will cause the kernel to be built without
support for "fast userspace mutexes". The resulting kernel may not
run glibc-based applications correctly.
+config FUTEX_PI
+ bool
+ depends on FUTEX && RT_MUTEXES
+ default y
+
config HAVE_FUTEX_CMPXCHG
bool
depends on FUTEX
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d5151688504..9ed6a051a1b9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -577,6 +577,13 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_unlock();
}
+/* Must be called with cpuset_mutex held. */
+static inline int nr_cpusets(void)
+{
+ /* jump label reference count + the top-level cpuset */
+ return static_key_count(&cpusets_enabled_key.key) + 1;
+}
+
/*
* generate_sched_domains()
*
diff --git a/kernel/exit.c b/kernel/exit.c
index c5548faa9f37..fa72d57db747 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -920,6 +920,7 @@ void __noreturn do_exit(long code)
exit_rcu();
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
+ lockdep_free_task(tsk);
do_task_dead();
}
EXPORT_SYMBOL_GPL(do_exit);
diff --git a/kernel/fork.c b/kernel/fork.c
index e075b7780421..5fc09911fbb9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -484,6 +484,8 @@ void __init fork_init(void)
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
NULL, free_vm_stack_cache);
#endif
+
+ lockdep_init_task(&init_task);
}
int __weak arch_dup_task_struct(struct task_struct *dst,
@@ -1691,6 +1693,7 @@ static __latent_entropy struct task_struct *copy_process(
p->lockdep_depth = 0; /* no locks held yet */
p->curr_chain_key = 0;
p->lockdep_recursion = 0;
+ lockdep_init_task(p);
#endif
#ifdef CONFIG_DEBUG_MUTEXES
@@ -1949,6 +1952,7 @@ bad_fork_cleanup_audit:
bad_fork_cleanup_perf:
perf_event_free_task(p);
bad_fork_cleanup_policy:
+ lockdep_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
diff --git a/kernel/futex.c b/kernel/futex.c
index f50b434756c1..0939255fc750 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -876,6 +876,8 @@ static struct task_struct *futex_find_get_task(pid_t pid)
return p;
}
+#ifdef CONFIG_FUTEX_PI
+
/*
* This task is holding PI mutexes at exit time => bad.
* Kernel cleans up PI-state, but userspace is likely hosed.
@@ -933,6 +935,8 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock_irq(&curr->pi_lock);
}
+#endif
+
/*
* We need to check the following states:
*
@@ -1800,6 +1804,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
struct futex_q *this, *next;
DEFINE_WAKE_Q(wake_q);
+ /*
+ * When PI not supported: return -ENOSYS if requeue_pi is true,
+ * consequently the compiler knows requeue_pi is always false past
+ * this point which will optimize away all the conditional code
+ * further down.
+ */
+ if (!IS_ENABLED(CONFIG_FUTEX_PI) && requeue_pi)
+ return -ENOSYS;
+
if (requeue_pi) {
/*
* Requeue PI only works on two distinct uaddrs. This
@@ -2595,6 +2608,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
struct futex_q q = futex_q_init;
int res, ret;
+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
+ return -ENOSYS;
+
if (refill_pi_state_cache())
return -ENOMEM;
@@ -2774,6 +2790,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
struct futex_q *top_waiter;
int ret;
+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
+ return -ENOSYS;
+
retry:
if (get_user(uval, uaddr))
return -EFAULT;
@@ -2984,6 +3003,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
struct futex_q q = futex_q_init;
int res, ret;
+ if (!IS_ENABLED(CONFIG_FUTEX_PI))
+ return -ENOSYS;
+
if (uaddr == uaddr2)
return -EINVAL;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index d11c506a6ac3..0bf2e8f5244a 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -79,29 +79,7 @@ int static_key_count(struct static_key *key)
}
EXPORT_SYMBOL_GPL(static_key_count);
-void static_key_enable(struct static_key *key)
-{
- int count = static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
-
- if (!count)
- static_key_slow_inc(key);
-}
-EXPORT_SYMBOL_GPL(static_key_enable);
-
-void static_key_disable(struct static_key *key)
-{
- int count = static_key_count(key);
-
- WARN_ON_ONCE(count < 0 || count > 1);
-
- if (count)
- static_key_slow_dec(key);
-}
-EXPORT_SYMBOL_GPL(static_key_disable);
-
-void static_key_slow_inc(struct static_key *key)
+static void static_key_slow_inc_cpuslocked(struct static_key *key)
{
int v, v1;
@@ -125,24 +103,87 @@ void static_key_slow_inc(struct static_key *key)
return;
}
- cpus_read_lock();
jump_label_lock();
if (atomic_read(&key->enabled) == 0) {
atomic_set(&key->enabled, -1);
jump_label_update(key);
- atomic_set(&key->enabled, 1);
+ /*
+ * Ensure that if the above cmpxchg loop observes our positive
+ * value, it must also observe all the text changes.
+ */
+ atomic_set_release(&key->enabled, 1);
} else {
atomic_inc(&key->enabled);
}
jump_label_unlock();
+}
+
+void static_key_slow_inc(struct static_key *key)
+{
+ cpus_read_lock();
+ static_key_slow_inc_cpuslocked(key);
cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(static_key_slow_inc);
-static void __static_key_slow_dec(struct static_key *key,
- unsigned long rate_limit, struct delayed_work *work)
+void static_key_enable_cpuslocked(struct static_key *key)
+{
+ STATIC_KEY_CHECK_USE();
+
+ if (atomic_read(&key->enabled) > 0) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 1);
+ return;
+ }
+
+ jump_label_lock();
+ if (atomic_read(&key->enabled) == 0) {
+ atomic_set(&key->enabled, -1);
+ jump_label_update(key);
+ /*
+ * See static_key_slow_inc().
+ */
+ atomic_set_release(&key->enabled, 1);
+ }
+ jump_label_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_enable_cpuslocked);
+
+void static_key_enable(struct static_key *key)
+{
+ cpus_read_lock();
+ static_key_enable_cpuslocked(key);
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_enable);
+
+void static_key_disable_cpuslocked(struct static_key *key)
+{
+ STATIC_KEY_CHECK_USE();
+
+ if (atomic_read(&key->enabled) != 1) {
+ WARN_ON_ONCE(atomic_read(&key->enabled) != 0);
+ return;
+ }
+
+ jump_label_lock();
+ if (atomic_cmpxchg(&key->enabled, 1, 0))
+ jump_label_update(key);
+ jump_label_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_disable_cpuslocked);
+
+void static_key_disable(struct static_key *key)
{
cpus_read_lock();
+ static_key_disable_cpuslocked(key);
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(static_key_disable);
+
+static void static_key_slow_dec_cpuslocked(struct static_key *key,
+ unsigned long rate_limit,
+ struct delayed_work *work)
+{
/*
* The negative count check is valid even when a negative
* key->enabled is in use by static_key_slow_inc(); a
@@ -153,7 +194,6 @@ static void __static_key_slow_dec(struct static_key *key,
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
WARN(atomic_read(&key->enabled) < 0,
"jump label: negative count!\n");
- cpus_read_unlock();
return;
}
@@ -164,6 +204,14 @@ static void __static_key_slow_dec(struct static_key *key,
jump_label_update(key);
}
jump_label_unlock();
+}
+
+static void __static_key_slow_dec(struct static_key *key,
+ unsigned long rate_limit,
+ struct delayed_work *work)
+{
+ cpus_read_lock();
+ static_key_slow_dec_cpuslocked(key, rate_limit, work);
cpus_read_unlock();
}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7d2499bec5fe..1114dc42c27f 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -58,6 +58,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+#include <linux/slab.h>
+#endif
+
#ifdef CONFIG_PROVE_LOCKING
int prove_locking = 1;
module_param(prove_locking, int, 0644);
@@ -344,14 +348,12 @@ EXPORT_SYMBOL(lockdep_on);
#if VERBOSE
# define HARDIRQ_VERBOSE 1
# define SOFTIRQ_VERBOSE 1
-# define RECLAIM_VERBOSE 1
#else
# define HARDIRQ_VERBOSE 0
# define SOFTIRQ_VERBOSE 0
-# define RECLAIM_VERBOSE 0
#endif
-#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
+#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
/*
* Quick filtering for interesting events:
*/
@@ -726,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
}
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+static void cross_init(struct lockdep_map *lock, int cross);
+static int cross_lock(struct lockdep_map *lock);
+static int lock_acquire_crosslock(struct held_lock *hlock);
+static int lock_release_crosslock(struct lockdep_map *lock);
+#else
+static inline void cross_init(struct lockdep_map *lock, int cross) {}
+static inline int cross_lock(struct lockdep_map *lock) { return 0; }
+static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
+static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
+#endif
+
/*
* Register a lock's class in the hash-table, if the class is not present
* yet. Otherwise we look it up. We cache the result in the lock object
@@ -1125,22 +1139,41 @@ print_circular_lock_scenario(struct held_lock *src,
printk(KERN_CONT "\n\n");
}
- printk(" Possible unsafe locking scenario:\n\n");
- printk(" CPU0 CPU1\n");
- printk(" ---- ----\n");
- printk(" lock(");
- __print_lock_name(target);
- printk(KERN_CONT ");\n");
- printk(" lock(");
- __print_lock_name(parent);
- printk(KERN_CONT ");\n");
- printk(" lock(");
- __print_lock_name(target);
- printk(KERN_CONT ");\n");
- printk(" lock(");
- __print_lock_name(source);
- printk(KERN_CONT ");\n");
- printk("\n *** DEADLOCK ***\n\n");
+ if (cross_lock(tgt->instance)) {
+ printk(" Possible unsafe locking scenario by crosslock:\n\n");
+ printk(" CPU0 CPU1\n");
+ printk(" ---- ----\n");
+ printk(" lock(");
+ __print_lock_name(parent);
+ printk(KERN_CONT ");\n");
+ printk(" lock(");
+ __print_lock_name(target);
+ printk(KERN_CONT ");\n");
+ printk(" lock(");
+ __print_lock_name(source);
+ printk(KERN_CONT ");\n");
+ printk(" unlock(");
+ __print_lock_name(target);
+ printk(KERN_CONT ");\n");
+ printk("\n *** DEADLOCK ***\n\n");
+ } else {
+ printk(" Possible unsafe locking scenario:\n\n");
+ printk(" CPU0 CPU1\n");
+ printk(" ---- ----\n");
+ printk(" lock(");
+ __print_lock_name(target);
+ printk(KERN_CONT ");\n");
+ printk(" lock(");
+ __print_lock_name(parent);
+ printk(KERN_CONT ");\n");
+ printk(" lock(");
+ __print_lock_name(target);
+ printk(KERN_CONT ");\n");
+ printk(" lock(");
+ __print_lock_name(source);
+ printk(KERN_CONT ");\n");
+ printk("\n *** DEADLOCK ***\n\n");
+ }
}
/*
@@ -1165,7 +1198,12 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
pr_warn("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(check_src);
- pr_warn("\nbut task is already holding lock:\n");
+
+ if (cross_lock(check_tgt->instance))
+ pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
+ else
+ pr_warn("\nbut task is already holding lock:\n");
+
print_lock(check_tgt);
pr_warn("\nwhich lock already depends on the new lock.\n\n");
pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
@@ -1183,7 +1221,8 @@ static inline int class_equal(struct lock_list *entry, void *data)
static noinline int print_circular_bug(struct lock_list *this,
struct lock_list *target,
struct held_lock *check_src,
- struct held_lock *check_tgt)
+ struct held_lock *check_tgt,
+ struct stack_trace *trace)
{
struct task_struct *curr = current;
struct lock_list *parent;
@@ -1193,7 +1232,9 @@ static noinline int print_circular_bug(struct lock_list *this,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- if (!save_trace(&this->trace))
+ if (cross_lock(check_tgt->instance))
+ this->trace = *trace;
+ else if (!save_trace(&this->trace))
return 0;
depth = get_lock_depth(target);
@@ -1309,6 +1350,19 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
return result;
}
+static noinline int
+check_redundant(struct lock_list *root, struct lock_class *target,
+ struct lock_list **target_entry)
+{
+ int result;
+
+ debug_atomic_inc(nr_redundant_checks);
+
+ result = __bfs_forwards(root, target, class_equal, target_entry);
+
+ return result;
+}
+
#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
/*
* Forwards and backwards subgraph searching, for the purposes of
@@ -1784,6 +1838,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
if (nest)
return 2;
+ if (cross_lock(prev->instance))
+ continue;
+
return print_deadlock_bug(curr, prev, next);
}
return 1;
@@ -1813,20 +1870,13 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
*/
static int
check_prev_add(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, int distance, int *stack_saved)
+ struct held_lock *next, int distance, struct stack_trace *trace,
+ int (*save)(struct stack_trace *trace))
{
struct lock_list *entry;
int ret;
struct lock_list this;
struct lock_list *uninitialized_var(target_entry);
- /*
- * Static variable, serialized by the graph_lock().
- *
- * We use this static variable to save the stack trace in case
- * we call into this function multiple times due to encountering
- * trylocks in the held lock stack.
- */
- static struct stack_trace trace;
/*
* Prove that the new <prev> -> <next> dependency would not
@@ -1841,7 +1891,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
this.parent = NULL;
ret = check_noncircular(&this, hlock_class(prev), &target_entry);
if (unlikely(!ret))
- return print_circular_bug(&this, target_entry, next, prev);
+ return print_circular_bug(&this, target_entry, next, prev, trace);
else if (unlikely(ret < 0))
return print_bfs_bug(ret);
@@ -1870,15 +1920,26 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
if (entry->class == hlock_class(next)) {
if (distance == 1)
entry->distance = 1;
- return 2;
+ return 1;
}
}
- if (!*stack_saved) {
- if (!save_trace(&trace))
- return 0;
- *stack_saved = 1;
+ /*
+ * Is the <prev> -> <next> link redundant?
+ */
+ this.class = hlock_class(prev);
+ this.parent = NULL;
+ ret = check_redundant(&this, hlock_class(next), &target_entry);
+ if (!ret) {
+ debug_atomic_inc(nr_redundant);
+ return 2;
}
+ if (ret < 0)
+ return print_bfs_bug(ret);
+
+
+ if (save && !save(trace))
+ return 0;
/*
* Ok, all validations passed, add the new lock
@@ -1886,14 +1947,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
*/
ret = add_lock_to_list(hlock_class(next),
&hlock_class(prev)->locks_after,
- next->acquire_ip, distance, &trace);
+ next->acquire_ip, distance, trace);
if (!ret)
return 0;
ret = add_lock_to_list(hlock_class(prev),
&hlock_class(next)->locks_before,
- next->acquire_ip, distance, &trace);
+ next->acquire_ip, distance, trace);
if (!ret)
return 0;
@@ -1901,8 +1962,6 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
* Debugging printouts:
*/
if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
- /* We drop graph lock, so another thread can overwrite trace. */
- *stack_saved = 0;
graph_unlock();
printk("\n new dependency: ");
print_lock_name(hlock_class(prev));
@@ -1910,9 +1969,10 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
print_lock_name(hlock_class(next));
printk(KERN_CONT "\n");
dump_stack();
- return graph_lock();
+ if (!graph_lock())
+ return 0;
}
- return 1;
+ return 2;
}
/*
@@ -1925,8 +1985,9 @@ static int
check_prevs_add(struct task_struct *curr, struct held_lock *next)
{
int depth = curr->lockdep_depth;
- int stack_saved = 0;
struct held_lock *hlock;
+ struct stack_trace trace;
+ int (*save)(struct stack_trace *trace) = save_trace;
/*
* Debugging checks.
@@ -1947,21 +2008,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
int distance = curr->lockdep_depth - depth + 1;
hlock = curr->held_locks + depth - 1;
/*
- * Only non-recursive-read entries get new dependencies
- * added:
+ * Only non-crosslock entries get new dependencies added.
+ * Crosslock entries will be added by commit later:
*/
- if (hlock->read != 2 && hlock->check) {
- if (!check_prev_add(curr, hlock, next,
- distance, &stack_saved))
- return 0;
+ if (!cross_lock(hlock->instance)) {
/*
- * Stop after the first non-trylock entry,
- * as non-trylock entries have added their
- * own direct dependencies already, so this
- * lock is connected to them indirectly:
+ * Only non-recursive-read entries get new dependencies
+ * added:
*/
- if (!hlock->trylock)
- break;
+ if (hlock->read != 2 && hlock->check) {
+ int ret = check_prev_add(curr, hlock, next,
+ distance, &trace, save);
+ if (!ret)
+ return 0;
+
+ /*
+ * Stop saving stack_trace if save_trace() was
+ * called at least once:
+ */
+ if (save && ret == 2)
+ save = NULL;
+
+ /*
+ * Stop after the first non-trylock entry,
+ * as non-trylock entries have added their
+ * own direct dependencies already, so this
+ * lock is connected to them indirectly:
+ */
+ if (!hlock->trylock)
+ break;
+ }
}
depth--;
/*
@@ -2126,19 +2202,26 @@ static int check_no_collision(struct task_struct *curr,
}
/*
- * Look up a dependency chain. If the key is not present yet then
- * add it and return 1 - in this case the new dependency chain is
- * validated. If the key is already hashed, return 0.
- * (On return with 1 graph_lock is held.)
+ * This is for building a chain between just two different classes,
+ * instead of adding a new hlock upon current, which is done by
+ * add_chain_cache().
+ *
+ * This can be called in any context with two classes, while
+ * add_chain_cache() must be done within the lock owener's context
+ * since it uses hlock which might be racy in another context.
*/
-static inline int lookup_chain_cache(struct task_struct *curr,
- struct held_lock *hlock,
- u64 chain_key)
+static inline int add_chain_cache_classes(unsigned int prev,
+ unsigned int next,
+ unsigned int irq_context,
+ u64 chain_key)
{
- struct lock_class *class = hlock_class(hlock);
struct hlist_head *hash_head = chainhashentry(chain_key);
struct lock_chain *chain;
- int i, j;
+
+ /*
+ * Allocate a new chain entry from the static array, and add
+ * it to the hash:
+ */
/*
* We might need to take the graph lock, ensure we've got IRQs
@@ -2147,43 +2230,76 @@ static inline int lookup_chain_cache(struct task_struct *curr,
*/
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return 0;
+
+ if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
+ if (!debug_locks_off_graph_unlock())
+ return 0;
+
+ print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
+ dump_stack();
+ return 0;
+ }
+
+ chain = lock_chains + nr_lock_chains++;
+ chain->chain_key = chain_key;
+ chain->irq_context = irq_context;
+ chain->depth = 2;
+ if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
+ chain->base = nr_chain_hlocks;
+ nr_chain_hlocks += chain->depth;
+ chain_hlocks[chain->base] = prev - 1;
+ chain_hlocks[chain->base + 1] = next -1;
+ }
+#ifdef CONFIG_DEBUG_LOCKDEP
/*
- * We can walk it lock-free, because entries only get added
- * to the hash:
+ * Important for check_no_collision().
*/
- hlist_for_each_entry_rcu(chain, hash_head, entry) {
- if (chain->chain_key == chain_key) {
-cache_hit:
- debug_atomic_inc(chain_lookup_hits);
- if (!check_no_collision(curr, hlock, chain))
- return 0;
-
- if (very_verbose(class))
- printk("\nhash chain already cached, key: "
- "%016Lx tail class: [%p] %s\n",
- (unsigned long long)chain_key,
- class->key, class->name);
+ else {
+ if (!debug_locks_off_graph_unlock())
return 0;
- }
+
+ print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
+ dump_stack();
+ return 0;
}
- if (very_verbose(class))
- printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
- (unsigned long long)chain_key, class->key, class->name);
+#endif
+
+ hlist_add_head_rcu(&chain->entry, hash_head);
+ debug_atomic_inc(chain_lookup_misses);
+ inc_chains();
+
+ return 1;
+}
+
+/*
+ * Adds a dependency chain into chain hashtable. And must be called with
+ * graph_lock held.
+ *
+ * Return 0 if fail, and graph_lock is released.
+ * Return 1 if succeed, with graph_lock held.
+ */
+static inline int add_chain_cache(struct task_struct *curr,
+ struct held_lock *hlock,
+ u64 chain_key)
+{
+ struct lock_class *class = hlock_class(hlock);
+ struct hlist_head *hash_head = chainhashentry(chain_key);
+ struct lock_chain *chain;
+ int i, j;
+
/*
* Allocate a new chain entry from the static array, and add
* it to the hash:
*/
- if (!graph_lock())
- return 0;
+
/*
- * We have to walk the chain again locked - to avoid duplicates:
+ * We might need to take the graph lock, ensure we've got IRQs
+ * disabled to make this an IRQ-safe lock.. for recursion reasons
+ * lockdep won't complain about its own locking errors.
*/
- hlist_for_each_entry(chain, hash_head, entry) {
- if (chain->chain_key == chain_key) {
- graph_unlock();
- goto cache_hit;
- }
- }
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return 0;
+
if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
if (!debug_locks_off_graph_unlock())
return 0;
@@ -2235,6 +2351,78 @@ cache_hit:
return 1;
}
+/*
+ * Look up a dependency chain.
+ */
+static inline struct lock_chain *lookup_chain_cache(u64 chain_key)
+{
+ struct hlist_head *hash_head = chainhashentry(chain_key);
+ struct lock_chain *chain;
+
+ /*
+ * We can walk it lock-free, because entries only get added
+ * to the hash:
+ */
+ hlist_for_each_entry_rcu(chain, hash_head, entry) {
+ if (chain->chain_key == chain_key) {
+ debug_atomic_inc(chain_lookup_hits);
+ return chain;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * If the key is not present yet in dependency chain cache then
+ * add it and return 1 - in this case the new dependency chain is
+ * validated. If the key is already hashed, return 0.
+ * (On return with 1 graph_lock is held.)
+ */
+static inline int lookup_chain_cache_add(struct task_struct *curr,
+ struct held_lock *hlock,
+ u64 chain_key)
+{
+ struct lock_class *class = hlock_class(hlock);
+ struct lock_chain *chain = lookup_chain_cache(chain_key);
+
+ if (chain) {
+cache_hit:
+ if (!check_no_collision(curr, hlock, chain))
+ return 0;
+
+ if (very_verbose(class)) {
+ printk("\nhash chain already cached, key: "
+ "%016Lx tail class: [%p] %s\n",
+ (unsigned long long)chain_key,
+ class->key, class->name);
+ }
+
+ return 0;
+ }
+
+ if (very_verbose(class)) {
+ printk("\nnew hash chain, key: %016Lx tail class: [%p] %s\n",
+ (unsigned long long)chain_key, class->key, class->name);
+ }
+
+ if (!graph_lock())
+ return 0;
+
+ /*
+ * We have to walk the chain again locked - to avoid duplicates:
+ */
+ chain = lookup_chain_cache(chain_key);
+ if (chain) {
+ graph_unlock();
+ goto cache_hit;
+ }
+
+ if (!add_chain_cache(curr, hlock, chain_key))
+ return 0;
+
+ return 1;
+}
+
static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
struct held_lock *hlock, int chain_head, u64 chain_key)
{
@@ -2245,11 +2433,11 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
*
* We look up the chain_key and do the O(N^2) check and update of
* the dependencies only if this is a new dependency chain.
- * (If lookup_chain_cache() returns with 1 it acquires
+ * (If lookup_chain_cache_add() return with 1 it acquires
* graph_lock for us)
*/
if (!hlock->trylock && hlock->check &&
- lookup_chain_cache(curr, hlock, chain_key)) {
+ lookup_chain_cache_add(curr, hlock, chain_key)) {
/*
* Check whether last held lock:
*
@@ -2277,14 +2465,17 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
* Add dependency only if this lock is not the head
* of the chain, and if it's not a secondary read-lock:
*/
- if (!chain_head && ret != 2)
+ if (!chain_head && ret != 2) {
if (!check_prevs_add(curr, hlock))
return 0;
+ }
+
graph_unlock();
- } else
- /* after lookup_chain_cache(): */
+ } else {
+ /* after lookup_chain_cache_add(): */
if (unlikely(!debug_locks))
return 0;
+ }
return 1;
}
@@ -2567,14 +2758,6 @@ static int SOFTIRQ_verbose(struct lock_class *class)
return 0;
}
-static int RECLAIM_FS_verbose(struct lock_class *class)
-{
-#if RECLAIM_VERBOSE
- return class_filter(class);
-#endif
- return 0;
-}
-
#define STRICT_READ_CHECKS 1
static int (*state_verbose_f[])(struct lock_class *class) = {
@@ -2870,57 +3053,6 @@ void trace_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off);
}
-static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
-{
- struct task_struct *curr = current;
-
- if (unlikely(!debug_locks))
- return;
-
- gfp_mask = current_gfp_context(gfp_mask);
-
- /* no reclaim without waiting on it */
- if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
- return;
-
- /* this guy won't enter reclaim */
- if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
- return;
-
- /* We're only interested __GFP_FS allocations for now */
- if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
- return;
-
- /*
- * Oi! Can't be having __GFP_FS allocations with IRQs disabled.
- */
- if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
- return;
-
- /* Disable lockdep if explicitly requested */
- if (gfp_mask & __GFP_NOLOCKDEP)
- return;
-
- mark_held_locks(curr, RECLAIM_FS);
-}
-
-static void check_flags(unsigned long flags);
-
-void lockdep_trace_alloc(gfp_t gfp_mask)
-{
- unsigned long flags;
-
- if (unlikely(current->lockdep_recursion))
- return;
-
- raw_local_irq_save(flags);
- check_flags(flags);
- current->lockdep_recursion = 1;
- __lockdep_trace_alloc(gfp_mask, flags);
- current->lockdep_recursion = 0;
- raw_local_irq_restore(flags);
-}
-
static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
{
/*
@@ -2966,22 +3098,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
}
}
- /*
- * We reuse the irq context infrastructure more broadly as a general
- * context checking code. This tests GFP_FS recursion (a lock taken
- * during reclaim for a GFP_FS allocation is held over a GFP_FS
- * allocation).
- */
- if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
- if (hlock->read) {
- if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
- return 0;
- } else {
- if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
- return 0;
- }
- }
-
return 1;
}
@@ -3040,10 +3156,6 @@ static inline int separate_irq_context(struct task_struct *curr,
return 0;
}
-void lockdep_trace_alloc(gfp_t gfp_mask)
-{
-}
-
#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
/*
@@ -3116,7 +3228,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
/*
* Initialize a lock instance's lock-class mapping info:
*/
-void lockdep_init_map(struct lockdep_map *lock, const char *name,
+static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, int subclass)
{
int i;
@@ -3174,8 +3286,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
raw_local_irq_restore(flags);
}
}
+
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass)
+{
+ cross_init(lock, 0);
+ __lockdep_init_map(lock, name, key, subclass);
+}
EXPORT_SYMBOL_GPL(lockdep_init_map);
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass)
+{
+ cross_init(lock, 1);
+ __lockdep_init_map(lock, name, key, subclass);
+}
+EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
+#endif
+
struct lock_class_key __lockdep_no_validate__;
EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
@@ -3231,6 +3360,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int chain_head = 0;
int class_idx;
u64 chain_key;
+ int ret;
if (unlikely(!debug_locks))
return 0;
@@ -3279,7 +3409,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
class_idx = class - lock_classes + 1;
- if (depth) {
+ /* TODO: nest_lock is not implemented for crosslock yet. */
+ if (depth && !cross_lock(lock)) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
if (hlock->references) {
@@ -3367,6 +3498,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
return 0;
+ ret = lock_acquire_crosslock(hlock);
+ /*
+ * 2 means normal acquire operations are needed. Otherwise, it's
+ * ok just to return with '0:fail, 1:success'.
+ */
+ if (ret != 2)
+ return ret;
+
curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
@@ -3604,11 +3743,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
struct task_struct *curr = current;
struct held_lock *hlock;
unsigned int depth;
- int i;
+ int ret, i;
if (unlikely(!debug_locks))
return 0;
+ ret = lock_release_crosslock(lock);
+ /*
+ * 2 means normal release operations are needed. Otherwise, it's
+ * ok just to return with '0:fail, 1:success'.
+ */
+ if (ret != 2)
+ return ret;
+
depth = curr->lockdep_depth;
/*
* So we're all set to release this lock.. wait what lock? We don't
@@ -3952,18 +4099,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
}
EXPORT_SYMBOL_GPL(lock_unpin_lock);
-void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
-{
- current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
-}
-EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
-
-void lockdep_clear_current_reclaim_state(void)
-{
- current->lockdep_reclaim_gfp = 0;
-}
-EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
-
#ifdef CONFIG_LOCK_STAT
static int
print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
@@ -4484,6 +4619,13 @@ asmlinkage __visible void lockdep_sys_exit(void)
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
}
+
+ /*
+ * The lock history for each syscall should be independent. So wipe the
+ * slate clean on return to userspace.
+ */
+ crossrelease_hist_end(XHLOCK_PROC);
+ crossrelease_hist_start(XHLOCK_PROC);
}
void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
@@ -4532,3 +4674,470 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
dump_stack();
}
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
+
+#ifdef CONFIG_LOCKDEP_CROSSRELEASE
+
+/*
+ * Crossrelease works by recording a lock history for each thread and
+ * connecting those historic locks that were taken after the
+ * wait_for_completion() in the complete() context.
+ *
+ * Task-A Task-B
+ *
+ * mutex_lock(&A);
+ * mutex_unlock(&A);
+ *
+ * wait_for_completion(&C);
+ * lock_acquire_crosslock();
+ * atomic_inc_return(&cross_gen_id);
+ * |
+ * | mutex_lock(&B);
+ * | mutex_unlock(&B);
+ * |
+ * | complete(&C);
+ * `-- lock_commit_crosslock();
+ *
+ * Which will then add a dependency between B and C.
+ */
+
+#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR])
+
+/*
+ * Whenever a crosslock is held, cross_gen_id will be increased.
+ */
+static atomic_t cross_gen_id; /* Can be wrapped */
+
+/*
+ * Make an entry of the ring buffer invalid.
+ */
+static inline void invalidate_xhlock(struct hist_lock *xhlock)
+{
+ /*
+ * Normally, xhlock->hlock.instance must be !NULL.
+ */
+ xhlock->hlock.instance = NULL;
+}
+
+/*
+ * Lock history stacks; we have 3 nested lock history stacks:
+ *
+ * Hard IRQ
+ * Soft IRQ
+ * History / Task
+ *
+ * The thing is that once we complete a (Hard/Soft) IRQ the future task locks
+ * should not depend on any of the locks observed while running the IRQ.
+ *
+ * So what we do is rewind the history buffer and erase all our knowledge of
+ * that temporal event.
+ */
+
+/*
+ * We need this to annotate lock history boundaries. Take for instance
+ * workqueues; each work is independent of the last. The completion of a future
+ * work does not depend on the completion of a past work (in general).
+ * Therefore we must not carry that (lock) dependency across works.
+ *
+ * This is true for many things; pretty much all kthreads fall into this
+ * pattern, where they have an 'idle' state and future completions do not
+ * depend on past completions. Its just that since they all have the 'same'
+ * form -- the kthread does the same over and over -- it doesn't typically
+ * matter.
+ *
+ * The same is true for system-calls, once a system call is completed (we've
+ * returned to userspace) the next system call does not depend on the lock
+ * history of the previous system call.
+ */
+void crossrelease_hist_start(enum xhlock_context_t c)
+{
+ struct task_struct *cur = current;
+
+ if (cur->xhlocks) {
+ cur->xhlock_idx_hist[c] = cur->xhlock_idx;
+ cur->hist_id_save[c] = cur->hist_id;
+ }
+}
+
+void crossrelease_hist_end(enum xhlock_context_t c)
+{
+ struct task_struct *cur = current;
+
+ if (cur->xhlocks) {
+ unsigned int idx = cur->xhlock_idx_hist[c];
+ struct hist_lock *h = &xhlock(idx);
+
+ cur->xhlock_idx = idx;
+
+ /* Check if the ring was overwritten. */
+ if (h->hist_id != cur->hist_id_save[c])
+ invalidate_xhlock(h);
+ }
+}
+
+static int cross_lock(struct lockdep_map *lock)
+{
+ return lock ? lock->cross : 0;
+}
+
+/*
+ * This is needed to decide the relationship between wrapable variables.
+ */
+static inline int before(unsigned int a, unsigned int b)
+{
+ return (int)(a - b) < 0;
+}
+
+static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
+{
+ return hlock_class(&xhlock->hlock);
+}
+
+static inline struct lock_class *xlock_class(struct cross_lock *xlock)
+{
+ return hlock_class(&xlock->hlock);
+}
+
+/*
+ * Should we check a dependency with previous one?
+ */
+static inline int depend_before(struct held_lock *hlock)
+{
+ return hlock->read != 2 && hlock->check && !hlock->trylock;
+}
+
+/*
+ * Should we check a dependency with next one?
+ */
+static inline int depend_after(struct held_lock *hlock)
+{
+ return hlock->read != 2 && hlock->check;
+}
+
+/*
+ * Check if the xhlock is valid, which would be false if,
+ *
+ * 1. Has not used after initializaion yet.
+ * 2. Got invalidated.
+ *
+ * Remind hist_lock is implemented as a ring buffer.
+ */
+static inline int xhlock_valid(struct hist_lock *xhlock)
+{
+ /*
+ * xhlock->hlock.instance must be !NULL.
+ */
+ return !!xhlock->hlock.instance;
+}
+
+/*
+ * Record a hist_lock entry.
+ *
+ * Irq disable is only required.
+ */
+static void add_xhlock(struct held_lock *hlock)
+{
+ unsigned int idx = ++current->xhlock_idx;
+ struct hist_lock *xhlock = &xhlock(idx);
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+ /*
+ * This can be done locklessly because they are all task-local
+ * state, we must however ensure IRQs are disabled.
+ */
+ WARN_ON_ONCE(!irqs_disabled());
+#endif
+
+ /* Initialize hist_lock's members */
+ xhlock->hlock = *hlock;
+ xhlock->hist_id = current->hist_id++;
+
+ xhlock->trace.nr_entries = 0;
+ xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
+ xhlock->trace.entries = xhlock->trace_entries;
+ xhlock->trace.skip = 3;
+ save_stack_trace(&xhlock->trace);
+}
+
+static inline int same_context_xhlock(struct hist_lock *xhlock)
+{
+ return xhlock->hlock.irq_context == task_irq_context(current);
+}
+
+/*
+ * This should be lockless as far as possible because this would be
+ * called very frequently.
+ */
+static void check_add_xhlock(struct held_lock *hlock)
+{
+ /*
+ * Record a hist_lock, only in case that acquisitions ahead
+ * could depend on the held_lock. For example, if the held_lock
+ * is trylock then acquisitions ahead never depends on that.
+ * In that case, we don't need to record it. Just return.
+ */
+ if (!current->xhlocks || !depend_before(hlock))
+ return;
+
+ add_xhlock(hlock);
+}
+
+/*
+ * For crosslock.
+ */
+static int add_xlock(struct held_lock *hlock)
+{
+ struct cross_lock *xlock;
+ unsigned int gen_id;
+
+ if (!graph_lock())
+ return 0;
+
+ xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
+
+ /*
+ * When acquisitions for a crosslock are overlapped, we use
+ * nr_acquire to perform commit for them, based on cross_gen_id
+ * of the first acquisition, which allows to add additional
+ * dependencies.
+ *
+ * Moreover, when no acquisition of a crosslock is in progress,
+ * we should not perform commit because the lock might not exist
+ * any more, which might cause incorrect memory access. So we
+ * have to track the number of acquisitions of a crosslock.
+ *
+ * depend_after() is necessary to initialize only the first
+ * valid xlock so that the xlock can be used on its commit.
+ */
+ if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
+ goto unlock;
+
+ gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
+ xlock->hlock = *hlock;
+ xlock->hlock.gen_id = gen_id;
+unlock:
+ graph_unlock();
+ return 1;
+}
+
+/*
+ * Called for both normal and crosslock acquires. Normal locks will be
+ * pushed on the hist_lock queue. Cross locks will record state and
+ * stop regular lock_acquire() to avoid being placed on the held_lock
+ * stack.
+ *
+ * Return: 0 - failure;
+ * 1 - crosslock, done;
+ * 2 - normal lock, continue to held_lock[] ops.
+ */
+static int lock_acquire_crosslock(struct held_lock *hlock)
+{
+ /*
+ * CONTEXT 1 CONTEXT 2
+ * --------- ---------
+ * lock A (cross)
+ * X = atomic_inc_return(&cross_gen_id)
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Y = atomic_read_acquire(&cross_gen_id)
+ * lock B
+ *
+ * atomic_read_acquire() is for ordering between A and B,
+ * IOW, A happens before B, when CONTEXT 2 see Y >= X.
+ *
+ * Pairs with atomic_inc_return() in add_xlock().
+ */
+ hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
+
+ if (cross_lock(hlock->instance))
+ return add_xlock(hlock);
+
+ check_add_xhlock(hlock);
+ return 2;
+}
+
+static int copy_trace(struct stack_trace *trace)
+{
+ unsigned long *buf = stack_trace + nr_stack_trace_entries;
+ unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
+ unsigned int nr = min(max_nr, trace->nr_entries);
+
+ trace->nr_entries = nr;
+ memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
+ trace->entries = buf;
+ nr_stack_trace_entries += nr;
+
+ if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
+ if (!debug_locks_off_graph_unlock())
+ return 0;
+
+ print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
+ dump_stack();
+
+ return 0;
+ }
+
+ return 1;
+}
+
+static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
+{
+ unsigned int xid, pid;
+ u64 chain_key;
+
+ xid = xlock_class(xlock) - lock_classes;
+ chain_key = iterate_chain_key((u64)0, xid);
+ pid = xhlock_class(xhlock) - lock_classes;
+ chain_key = iterate_chain_key(chain_key, pid);
+
+ if (lookup_chain_cache(chain_key))
+ return 1;
+
+ if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
+ chain_key))
+ return 0;
+
+ if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
+ &xhlock->trace, copy_trace))
+ return 0;
+
+ return 1;
+}
+
+static void commit_xhlocks(struct cross_lock *xlock)
+{
+ unsigned int cur = current->xhlock_idx;
+ unsigned int prev_hist_id = xhlock(cur).hist_id;
+ unsigned int i;
+
+ if (!graph_lock())
+ return;
+
+ if (xlock->nr_acquire) {
+ for (i = 0; i < MAX_XHLOCKS_NR; i++) {
+ struct hist_lock *xhlock = &xhlock(cur - i);
+
+ if (!xhlock_valid(xhlock))
+ break;
+
+ if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
+ break;
+
+ if (!same_context_xhlock(xhlock))
+ break;
+
+ /*
+ * Filter out the cases that the ring buffer was
+ * overwritten and the previous entry has a bigger
+ * hist_id than the following one, which is impossible
+ * otherwise.
+ */
+ if (unlikely(before(xhlock->hist_id, prev_hist_id)))
+ break;
+
+ prev_hist_id = xhlock->hist_id;
+
+ /*
+ * commit_xhlock() returns 0 with graph_lock already
+ * released if fail.
+ */
+ if (!commit_xhlock(xlock, xhlock))
+ return;
+ }
+ }
+
+ graph_unlock();
+}
+
+void lock_commit_crosslock(struct lockdep_map *lock)
+{
+ struct cross_lock *xlock;
+ unsigned long flags;
+
+ if (unlikely(!debug_locks || current->lockdep_recursion))
+ return;
+
+ if (!current->xhlocks)
+ return;
+
+ /*
+ * Do commit hist_locks with the cross_lock, only in case that
+ * the cross_lock could depend on acquisitions after that.
+ *
+ * For example, if the cross_lock does not have the 'check' flag
+ * then we don't need to check dependencies and commit for that.
+ * Just skip it. In that case, of course, the cross_lock does
+ * not depend on acquisitions ahead, either.
+ *
+ * WARNING: Don't do that in add_xlock() in advance. When an
+ * acquisition context is different from the commit context,
+ * invalid(skipped) cross_lock might be accessed.
+ */
+ if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+ current->lockdep_recursion = 1;
+ xlock = &((struct lockdep_map_cross *)lock)->xlock;
+ commit_xhlocks(xlock);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_commit_crosslock);
+
+/*
+ * Return: 0 - failure;
+ * 1 - crosslock, done;
+ * 2 - normal lock, continue to held_lock[] ops.
+ */
+static int lock_release_crosslock(struct lockdep_map *lock)
+{
+ if (cross_lock(lock)) {
+ if (!graph_lock())
+ return 0;
+ ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
+ graph_unlock();
+ return 1;
+ }
+ return 2;
+}
+
+static void cross_init(struct lockdep_map *lock, int cross)
+{
+ if (cross)
+ ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
+
+ lock->cross = cross;
+
+ /*
+ * Crossrelease assumes that the ring buffer size of xhlocks
+ * is aligned with power of 2. So force it on build.
+ */
+ BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
+}
+
+void lockdep_init_task(struct task_struct *task)
+{
+ int i;
+
+ task->xhlock_idx = UINT_MAX;
+ task->hist_id = 0;
+
+ for (i = 0; i < XHLOCK_CTX_NR; i++) {
+ task->xhlock_idx_hist[i] = UINT_MAX;
+ task->hist_id_save[i] = 0;
+ }
+
+ task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
+ GFP_KERNEL);
+}
+
+void lockdep_free_task(struct task_struct *task)
+{
+ if (task->xhlocks) {
+ void *tmp = task->xhlocks;
+ /* Diable crossrelease for current */
+ task->xhlocks = NULL;
+ kfree(tmp);
+ }
+}
+#endif
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c08fbd2f5ba9..1da4669d57a7 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -143,6 +143,8 @@ struct lockdep_stats {
int redundant_softirqs_on;
int redundant_softirqs_off;
int nr_unused_locks;
+ int nr_redundant_checks;
+ int nr_redundant;
int nr_cyclic_checks;
int nr_cyclic_check_recursions;
int nr_find_usage_forwards_checks;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 6d1fcc786081..68d9e267ccd4 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -201,6 +201,10 @@ static void lockdep_stats_debug_show(struct seq_file *m)
debug_atomic_read(chain_lookup_hits));
seq_printf(m, " cyclic checks: %11llu\n",
debug_atomic_read(nr_cyclic_checks));
+ seq_printf(m, " redundant checks: %11llu\n",
+ debug_atomic_read(nr_redundant_checks));
+ seq_printf(m, " redundant links: %11llu\n",
+ debug_atomic_read(nr_redundant));
seq_printf(m, " find-mask forwards checks: %11llu\n",
debug_atomic_read(nr_find_usage_forwards_checks));
seq_printf(m, " find-mask backwards checks: %11llu\n",
diff --git a/kernel/locking/lockdep_states.h b/kernel/locking/lockdep_states.h
index 995b0cc2b84c..35ca09f2ed0b 100644
--- a/kernel/locking/lockdep_states.h
+++ b/kernel/locking/lockdep_states.h
@@ -6,4 +6,3 @@
*/
LOCKDEP_STATE(HARDIRQ)
LOCKDEP_STATE(SOFTIRQ)
-LOCKDEP_STATE(RECLAIM_FS)
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index a3167941093b..a74ee6abd039 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -109,6 +109,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = decode_cpu(old);
node->prev = prev;
+
+ /*
+ * osq_lock() unqueue
+ *
+ * node->prev = prev osq_wait_next()
+ * WMB MB
+ * prev->next = node next->prev = prev // unqueue-C
+ *
+ * Here 'node->prev' and 'next->prev' are the same variable and we need
+ * to ensure these stores happen in-order to avoid corrupting the list.
+ */
+ smp_wmb();
+
WRITE_ONCE(prev->next, node);
/*
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 72ad45a9a794..8d039b928d61 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -40,6 +40,9 @@ struct rt_mutex_waiter {
/*
* Various helpers to access the waiters-tree:
*/
+
+#ifdef CONFIG_RT_MUTEXES
+
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
{
return !RB_EMPTY_ROOT(&lock->waiters);
@@ -69,6 +72,32 @@ task_top_pi_waiter(struct task_struct *p)
pi_tree_entry);
}
+#else
+
+static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
+{
+ return false;
+}
+
+static inline struct rt_mutex_waiter *
+rt_mutex_top_waiter(struct rt_mutex *lock)
+{
+ return NULL;
+}
+
+static inline int task_has_pi_waiters(struct task_struct *p)
+{
+ return false;
+}
+
+static inline struct rt_mutex_waiter *
+task_top_pi_waiter(struct task_struct *p)
+{
+ return NULL;
+}
+
+#endif
+
/*
* lock->owner state tracking:
*/
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 20819df98125..0848634c5512 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -126,7 +126,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
/*
* get a read lock on the semaphore
*/
-void __sched __down_read(struct rw_semaphore *sem)
+int __sched __down_read_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
unsigned long flags;
@@ -140,8 +140,6 @@ void __sched __down_read(struct rw_semaphore *sem)
goto out;
}
- set_current_state(TASK_UNINTERRUPTIBLE);
-
/* set up my own style of waitqueue */
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
@@ -149,20 +147,41 @@ void __sched __down_read(struct rw_semaphore *sem)
list_add_tail(&waiter.list, &sem->wait_list);
- /* we don't need to touch the semaphore struct anymore */
- raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
-
/* wait to be given the lock */
for (;;) {
if (!waiter.task)
break;
+ if (signal_pending_state(state, current))
+ goto out_nolock;
+ set_current_state(state);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
- __set_current_state(TASK_RUNNING);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
out:
- ;
+ return 0;
+
+out_nolock:
+ /*
+ * We didn't take the lock, so that there is a writer, which
+ * is owner or the first waiter of the sem. If it's a waiter,
+ * it will be woken by current owner. Not need to wake anybody.
+ */
+ list_del(&waiter.list);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ return -EINTR;
+}
+
+void __sched __down_read(struct rw_semaphore *sem)
+{
+ __down_read_common(sem, TASK_UNINTERRUPTIBLE);
+}
+
+int __sched __down_read_killable(struct rw_semaphore *sem)
+{
+ return __down_read_common(sem, TASK_KILLABLE);
}
/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 34e727f18e49..02f660666ab8 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -221,8 +221,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
/*
* Wait for the read lock to be granted
*/
-__visible
-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore __sched *
+__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
@@ -255,17 +255,44 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
/* wait to be given the lock */
while (true) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(state);
if (!waiter.task)
break;
+ if (signal_pending_state(state, current)) {
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter.task)
+ goto out_nolock;
+ raw_spin_unlock_irq(&sem->wait_lock);
+ break;
+ }
schedule();
}
__set_current_state(TASK_RUNNING);
return sem;
+out_nolock:
+ list_del(&waiter.list);
+ if (list_empty(&sem->wait_list))
+ atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
+ raw_spin_unlock_irq(&sem->wait_lock);
+ __set_current_state(TASK_RUNNING);
+ return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed(struct rw_semaphore *sem)
+{
+ return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed_killable(struct rw_semaphore *sem)
+{
+ return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_read_failed_killable);
+
/*
* This function must be called with the sem->wait_lock held to prevent
* race conditions between checking the rwsem wait list and setting the
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 13fc5ae9bf2f..566b6ec7b6fe 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -32,6 +32,12 @@ void complete(struct completion *x)
unsigned long flags;
spin_lock_irqsave(&x->wait.lock, flags);
+
+ /*
+ * Perform commit of crossrelease here.
+ */
+ complete_release_commit(x);
+
if (x->done != UINT_MAX)
x->done++;
__wake_up_locked(&x->wait, TASK_NORMAL, 1);
@@ -92,9 +98,14 @@ __wait_for_common(struct completion *x,
{
might_sleep();
+ complete_acquire(x);
+
spin_lock_irq(&x->wait.lock);
timeout = do_wait_for_common(x, action, timeout, state);
spin_unlock_irq(&x->wait.lock);
+
+ complete_release(x);
+
return timeout;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0869b20fba81..9fece583a1f0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1967,8 +1967,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* reordered with p->state check below. This pairs with mb() in
* set_current_state() the waiting thread does.
*/
- smp_mb__before_spinlock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
+ smp_mb__after_spinlock();
if (!(p->state & state))
goto out;
@@ -3281,8 +3281,8 @@ static void __sched notrace __schedule(bool preempt)
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
* done by the caller to avoid the race with signal_wake_up().
*/
- smp_mb__before_spinlock();
rq_lock(rq, &rf);
+ smp_mb__after_spinlock();
/* Promote REQ to ACT */
rq->clock_update_flags <<= 1;
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 3d5610dcce11..2227e183e202 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -33,9 +33,6 @@ void swake_up(struct swait_queue_head *q)
{
unsigned long flags;
- if (!swait_active(q))
- return;
-
raw_spin_lock_irqsave(&q->lock, flags);
swake_up_locked(q);
raw_spin_unlock_irqrestore(&q->lock, flags);
@@ -51,9 +48,6 @@ void swake_up_all(struct swait_queue_head *q)
struct swait_queue *curr;
LIST_HEAD(tmp);
- if (!swait_active(q))
- return;
-
raw_spin_lock_irq(&q->lock);
list_splice_init(&q->task_list, &tmp);
while (!list_empty(&tmp)) {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca937b0c3a96..e86733a8b344 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2093,6 +2093,7 @@ __acquires(&pool->lock)
lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
+ crossrelease_hist_start(XHLOCK_PROC);
trace_workqueue_execute_start(work);
worker->current_func(work);
/*
@@ -2100,6 +2101,7 @@ __acquires(&pool->lock)
* point will only record its address.
*/
trace_workqueue_execute_end(work);
+ crossrelease_hist_end(XHLOCK_PROC);
lock_map_release(&lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 98fe715522e8..ebd40d3b0b28 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1150,6 +1150,27 @@ config LOCK_STAT
CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
(CONFIG_LOCKDEP defines "acquire" and "release" events.)
+config LOCKDEP_CROSSRELEASE
+ bool "Lock debugging: make lockdep work for crosslocks"
+ depends on PROVE_LOCKING
+ default n
+ help
+ This makes lockdep work for crosslock which is a lock allowed to
+ be released in a different context from the acquisition context.
+ Normally a lock must be released in the context acquiring the lock.
+ However, relexing this constraint helps synchronization primitives
+ such as page locks or completions can use the lock correctness
+ detector, lockdep.
+
+config LOCKDEP_COMPLETE
+ bool "Lock debugging: allow completions to use deadlock detector"
+ depends on PROVE_LOCKING
+ select LOCKDEP_CROSSRELEASE
+ default n
+ help
+ A deadlock caused by wait_for_completion() and complete() can be
+ detected by lockdep using crossrelease feature.
+
config DEBUG_LOCKDEP
bool "Lock dependency engine debugging"
depends on DEBUG_KERNEL && LOCKDEP
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 216114f6ef0b..ce883459e246 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1410,6 +1410,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id();
int target_nid, last_cpupid = -1;
+ bool need_flush = false;
bool page_locked;
bool migrated = false;
bool was_writable;
@@ -1503,10 +1504,29 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
/*
+ * Since we took the NUMA fault, we must have observed the !accessible
+ * bit. Make sure all other CPUs agree with that, to avoid them
+ * modifying the page we're about to migrate.
+ *
+ * Must be done under PTL such that we'll observe the relevant
+ * set_tlb_flush_pending().
+ */
+ if (mm_tlb_flush_pending(vma->vm_mm))
+ need_flush = true;
+
+ /*
* Migrate the THP to the requested node, returns with page unlocked
* and access rights restored.
*/
spin_unlock(vmf->ptl);
+
+ /*
+ * We are not sure a pending tlb flush here is for a huge page
+ * mapping or not. Hence use the tlb range variant
+ */
+ if (need_flush)
+ flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
vmf->pmd, pmd, vmf->address, page, target_nid);
if (migrated) {
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index ca11bc4ce205..6f319fb81718 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -267,13 +267,13 @@ static void check_memory_region(unsigned long addr,
check_memory_region_inline(addr, size, write, ret_ip);
}
-void kasan_check_read(const void *p, unsigned int size)
+void kasan_check_read(const volatile void *p, unsigned int size)
{
check_memory_region((unsigned long)p, size, false, _RET_IP_);
}
EXPORT_SYMBOL(kasan_check_read);
-void kasan_check_write(const void *p, unsigned int size)
+void kasan_check_write(const volatile void *p, unsigned int size)
{
check_memory_region((unsigned long)p, size, true, _RET_IP_);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d00f746c2fd..6acf612fdd99 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
+#include <linux/lockdep.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -3490,6 +3491,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
}
#endif /* CONFIG_COMPACTION */
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __fs_reclaim_map =
+ STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
+
+static bool __need_fs_reclaim(gfp_t gfp_mask)
+{
+ gfp_mask = current_gfp_context(gfp_mask);
+
+ /* no reclaim without waiting on it */
+ if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
+ return false;
+
+ /* this guy won't enter reclaim */
+ if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+ return false;
+
+ /* We're only interested __GFP_FS allocations for now */
+ if (!(gfp_mask & __GFP_FS))
+ return false;
+
+ if (gfp_mask & __GFP_NOLOCKDEP)
+ return false;
+
+ return true;
+}
+
+void fs_reclaim_acquire(gfp_t gfp_mask)
+{
+ if (__need_fs_reclaim(gfp_mask))
+ lock_map_acquire(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
+
+void fs_reclaim_release(gfp_t gfp_mask)
+{
+ if (__need_fs_reclaim(gfp_mask))
+ lock_map_release(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_release);
+#endif
+
/* Perform direct synchronous page reclaim */
static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -3504,7 +3546,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
noreclaim_flag = memalloc_noreclaim_save();
- lockdep_set_current_reclaim_state(gfp_mask);
+ fs_reclaim_acquire(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@@ -3512,7 +3554,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
ac->nodemask);
current->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
+ fs_reclaim_release(gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
@@ -4041,7 +4083,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
*alloc_flags |= ALLOC_CPUSET;
}
- lockdep_trace_alloc(gfp_mask);
+ fs_reclaim_acquire(gfp_mask);
+ fs_reclaim_release(gfp_mask);
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
diff --git a/mm/slab.h b/mm/slab.h
index 6885e1192ec5..073362816acc 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,6 +43,7 @@ struct kmem_cache {
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/random.h>
+#include <linux/sched/mm.h>
/*
* State of the slab allocator.
@@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
gfp_t flags)
{
flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
+
+ fs_reclaim_acquire(flags);
+ fs_reclaim_release(flags);
+
might_sleep_if(gfpflags_allow_blocking(flags));
if (should_failslab(s, flags))
diff --git a/mm/slob.c b/mm/slob.c
index 1bae78d71096..a8bd6fa11a66 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp &= gfp_allowed_mask;
- lockdep_trace_alloc(gfp);
+ fs_reclaim_acquire(gfp);
+ fs_reclaim_release(gfp);
if (size < PAGE_SIZE - align) {
if (!size)
@@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
+ fs_reclaim_acquire(flags);
+ fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1af041930a6..f957afe900ec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3525,8 +3525,6 @@ static int kswapd(void *p)
};
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- lockdep_set_current_reclaim_state(GFP_KERNEL);
-
if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state;
@@ -3585,14 +3583,15 @@ kswapd_try_sleep:
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order);
+ fs_reclaim_acquire(GFP_KERNEL);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
+ fs_reclaim_release(GFP_KERNEL);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
}
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
current->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
return 0;
}
@@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
unsigned int noreclaim_flag;
noreclaim_flag = memalloc_noreclaim_save();
- lockdep_set_current_reclaim_state(sc.gfp_mask);
+ fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
p->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
+ fs_reclaim_release(sc.gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
@@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
*/
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
- lockdep_set_current_reclaim_state(sc.gfp_mask);
+ fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
p->reclaim_state = NULL;
+ fs_reclaim_release(gfp_mask);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
- lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a7c804f73990..b34f09b20fef 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1809,8 +1809,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
static struct static_key udp_encap_needed __read_mostly;
void udp_encap_enable(void)
{
- if (!static_key_enabled(&udp_encap_needed))
- static_key_slow_inc(&udp_encap_needed);
+ static_key_enable(&udp_encap_needed);
}
EXPORT_SYMBOL(udp_encap_enable);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 578142b7ca3e..96d240798c38 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -574,8 +574,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
static struct static_key udpv6_encap_needed __read_mostly;
void udpv6_encap_enable(void)
{
- if (!static_key_enabled(&udpv6_encap_needed))
- static_key_slow_inc(&udpv6_encap_needed);
+ static_key_enable(&udpv6_encap_needed);
}
EXPORT_SYMBOL(udpv6_encap_enable);