From 92b4e9f11a636d1723cc0866bf8b9111b1e24339 Mon Sep 17 00:00:00 2001 From: Marta Rybczynska Date: Sun, 26 Jul 2020 20:54:40 +0200 Subject: Documentation/locking/locktypes: Fix local_locks documentation Fix issues with local_locks documentation: - fix function names, local_lock.h has local_unlock_irqrestore(), not local_lock_irqrestore() - fix mapping table, local_unlock_irqrestore() maps to local_irq_restore(), not _save() Signed-off-by: Marta Rybczynska Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Link: https://lkml.kernel.org/r/CAApg2=SKxQ3Sqwj6TZnV-0x0cKLXFKDaPvXT4N15MPDMKq724g@mail.gmail.com --- Documentation/locking/locktypes.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index 4cefed8048ca..ddada4a53749 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -164,14 +164,14 @@ by disabling preemption or interrupts. On non-PREEMPT_RT kernels local_lock operations map to the preemption and interrupt disabling and enabling primitives: - =========================== ====================== - local_lock(&llock) preempt_disable() - local_unlock(&llock) preempt_enable() - local_lock_irq(&llock) local_irq_disable() - local_unlock_irq(&llock) local_irq_enable() - local_lock_save(&llock) local_irq_save() - local_lock_restore(&llock) local_irq_save() - =========================== ====================== + =============================== ====================== + local_lock(&llock) preempt_disable() + local_unlock(&llock) preempt_enable() + local_lock_irq(&llock) local_irq_disable() + local_unlock_irq(&llock) local_irq_enable() + local_lock_irqsave(&llock) local_irq_save() + local_unlock_irqrestore(&llock) local_irq_restore() + =============================== ====================== The named scope of local_lock has two advantages over the regular primitives: @@ -353,14 +353,14 @@ protection scope. So the following substitution is wrong:: { local_irq_save(flags); -> local_lock_irqsave(&local_lock_1, flags); func3(); - local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_1, flags); + local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock_1, flags); } func2() { local_irq_save(flags); -> local_lock_irqsave(&local_lock_2, flags); func3(); - local_irq_restore(flags); -> local_lock_irqrestore(&local_lock_2, flags); + local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock_2, flags); } func3() @@ -379,14 +379,14 @@ PREEMPT_RT-specific semantics of spinlock_t. The correct substitution is:: { local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags); func3(); - local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags); + local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock, flags); } func2() { local_irq_save(flags); -> local_lock_irqsave(&local_lock, flags); func3(); - local_irq_restore(flags); -> local_lock_irqrestore(&local_lock, flags); + local_irq_restore(flags); -> local_unlock_irqrestore(&local_lock, flags); } func3() -- cgit v1.2.3 From 224ec489d3cdb0af6794e257eeee39d98dc9c5b2 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 7 Aug 2020 15:42:21 +0800 Subject: lockdep/Documention: Recursive read lock detection reasoning This patch add the documentation piece for the reasoning of deadlock detection related to recursive read lock. The following sections are added: * Explain what is a recursive read lock, and what deadlock cases they could introduce. * Introduce the notations for different types of dependencies, and the definition of strong paths. * Proof for a closed strong path is both sufficient and necessary for deadlock detections with recursive read locks involved. The proof could also explain why we call the path "strong" Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200807074238.1632519-3-boqun.feng@gmail.com --- Documentation/locking/lockdep-design.rst | 258 +++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) (limited to 'Documentation') diff --git a/Documentation/locking/lockdep-design.rst b/Documentation/locking/lockdep-design.rst index 23fcbc4d3fc0..cec03bd1294a 100644 --- a/Documentation/locking/lockdep-design.rst +++ b/Documentation/locking/lockdep-design.rst @@ -392,3 +392,261 @@ Run the command and save the output, then compare against the output from a later run of this command to identify the leakers. This same output can also help you find situations where runtime lock initialization has been omitted. + +Recursive read locks: +--------------------- +The whole of the rest document tries to prove a certain type of cycle is equivalent +to deadlock possibility. + +There are three types of lockers: writers (i.e. exclusive lockers, like +spin_lock() or write_lock()), non-recursive readers (i.e. shared lockers, like +down_read()) and recursive readers (recursive shared lockers, like rcu_read_lock()). +And we use the following notations of those lockers in the rest of the document: + + W or E: stands for writers (exclusive lockers). + r: stands for non-recursive readers. + R: stands for recursive readers. + S: stands for all readers (non-recursive + recursive), as both are shared lockers. + N: stands for writers and non-recursive readers, as both are not recursive. + +Obviously, N is "r or W" and S is "r or R". + +Recursive readers, as their name indicates, are the lockers allowed to acquire +even inside the critical section of another reader of the same lock instance, +in other words, allowing nested read-side critical sections of one lock instance. + +While non-recursive readers will cause a self deadlock if trying to acquire inside +the critical section of another reader of the same lock instance. + +The difference between recursive readers and non-recursive readers is because: +recursive readers get blocked only by a write lock *holder*, while non-recursive +readers could get blocked by a write lock *waiter*. Considering the follow example: + + TASK A: TASK B: + + read_lock(X); + write_lock(X); + read_lock_2(X); + +Task A gets the reader (no matter whether recursive or non-recursive) on X via +read_lock() first. And when task B tries to acquire writer on X, it will block +and become a waiter for writer on X. Now if read_lock_2() is recursive readers, +task A will make progress, because writer waiters don't block recursive readers, +and there is no deadlock. However, if read_lock_2() is non-recursive readers, +it will get blocked by writer waiter B, and cause a self deadlock. + +Block conditions on readers/writers of the same lock instance: +-------------------------------------------------------------- +There are simply four block conditions: + +1. Writers block other writers. +2. Readers block writers. +3. Writers block both recursive readers and non-recursive readers. +4. And readers (recursive or not) don't block other recursive readers but + may block non-recursive readers (because of the potential co-existing + writer waiters) + +Block condition matrix, Y means the row blocks the column, and N means otherwise. + + | E | r | R | + +---+---+---+---+ + E | Y | Y | Y | + +---+---+---+---+ + r | Y | Y | N | + +---+---+---+---+ + R | Y | Y | N | + + (W: writers, r: non-recursive readers, R: recursive readers) + + +acquired recursively. Unlike non-recursive read locks, recursive read locks +only get blocked by current write lock *holders* other than write lock +*waiters*, for example: + + TASK A: TASK B: + + read_lock(X); + + write_lock(X); + + read_lock(X); + +is not a deadlock for recursive read locks, as while the task B is waiting for +the lock X, the second read_lock() doesn't need to wait because it's a recursive +read lock. However if the read_lock() is non-recursive read lock, then the above +case is a deadlock, because even if the write_lock() in TASK B cannot get the +lock, but it can block the second read_lock() in TASK A. + +Note that a lock can be a write lock (exclusive lock), a non-recursive read +lock (non-recursive shared lock) or a recursive read lock (recursive shared +lock), depending on the lock operations used to acquire it (more specifically, +the value of the 'read' parameter for lock_acquire()). In other words, a single +lock instance has three types of acquisition depending on the acquisition +functions: exclusive, non-recursive read, and recursive read. + +To be concise, we call that write locks and non-recursive read locks as +"non-recursive" locks and recursive read locks as "recursive" locks. + +Recursive locks don't block each other, while non-recursive locks do (this is +even true for two non-recursive read locks). A non-recursive lock can block the +corresponding recursive lock, and vice versa. + +A deadlock case with recursive locks involved is as follow: + + TASK A: TASK B: + + read_lock(X); + read_lock(Y); + write_lock(Y); + write_lock(X); + +Task A is waiting for task B to read_unlock() Y and task B is waiting for task +A to read_unlock() X. + +Dependency types and strong dependency paths: +--------------------------------------------- +Lock dependencies record the orders of the acquisitions of a pair of locks, and +because there are 3 types for lockers, there are, in theory, 9 types of lock +dependencies, but we can show that 4 types of lock dependencies are enough for +deadlock detection. + +For each lock dependency: + + L1 -> L2 + +, which means lockdep has seen L1 held before L2 held in the same context at runtime. +And in deadlock detection, we care whether we could get blocked on L2 with L1 held, +IOW, whether there is a locker L3 that L1 blocks L3 and L2 gets blocked by L3. So +we only care about 1) what L1 blocks and 2) what blocks L2. As a result, we can combine +recursive readers and non-recursive readers for L1 (as they block the same types) and +we can combine writers and non-recursive readers for L2 (as they get blocked by the +same types). + +With the above combination for simplification, there are 4 types of dependency edges +in the lockdep graph: + +1) -(ER)->: exclusive writer to recursive reader dependency, "X -(ER)-> Y" means + X -> Y and X is a writer and Y is a recursive reader. + +2) -(EN)->: exclusive writer to non-recursive locker dependency, "X -(EN)-> Y" means + X -> Y and X is a writer and Y is either a writer or non-recursive reader. + +3) -(SR)->: shared reader to recursive reader dependency, "X -(SR)-> Y" means + X -> Y and X is a reader (recursive or not) and Y is a recursive reader. + +4) -(SN)->: shared reader to non-recursive locker dependency, "X -(SN)-> Y" means + X -> Y and X is a reader (recursive or not) and Y is either a writer or + non-recursive reader. + +Note that given two locks, they may have multiple dependencies between them, for example: + + TASK A: + + read_lock(X); + write_lock(Y); + ... + + TASK B: + + write_lock(X); + write_lock(Y); + +, we have both X -(SN)-> Y and X -(EN)-> Y in the dependency graph. + +We use -(xN)-> to represent edges that are either -(EN)-> or -(SN)->, the +similar for -(Ex)->, -(xR)-> and -(Sx)-> + +A "path" is a series of conjunct dependency edges in the graph. And we define a +"strong" path, which indicates the strong dependency throughout each dependency +in the path, as the path that doesn't have two conjunct edges (dependencies) as +-(xR)-> and -(Sx)->. In other words, a "strong" path is a path from a lock +walking to another through the lock dependencies, and if X -> Y -> Z is in the +path (where X, Y, Z are locks), and the walk from X to Y is through a -(SR)-> or +-(ER)-> dependency, the walk from Y to Z must not be through a -(SN)-> or +-(SR)-> dependency. + +We will see why the path is called "strong" in next section. + +Recursive Read Deadlock Detection: +---------------------------------- + +We now prove two things: + +Lemma 1: + +If there is a closed strong path (i.e. a strong circle), then there is a +combination of locking sequences that causes deadlock. I.e. a strong circle is +sufficient for deadlock detection. + +Lemma 2: + +If there is no closed strong path (i.e. strong circle), then there is no +combination of locking sequences that could cause deadlock. I.e. strong +circles are necessary for deadlock detection. + +With these two Lemmas, we can easily say a closed strong path is both sufficient +and necessary for deadlocks, therefore a closed strong path is equivalent to +deadlock possibility. As a closed strong path stands for a dependency chain that +could cause deadlocks, so we call it "strong", considering there are dependency +circles that won't cause deadlocks. + +Proof for sufficiency (Lemma 1): + +Let's say we have a strong circle: + + L1 -> L2 ... -> Ln -> L1 + +, which means we have dependencies: + + L1 -> L2 + L2 -> L3 + ... + Ln-1 -> Ln + Ln -> L1 + +We now can construct a combination of locking sequences that cause deadlock: + +Firstly let's make one CPU/task get the L1 in L1 -> L2, and then another get +the L2 in L2 -> L3, and so on. After this, all of the Lx in Lx -> Lx+1 are +held by different CPU/tasks. + +And then because we have L1 -> L2, so the holder of L1 is going to acquire L2 +in L1 -> L2, however since L2 is already held by another CPU/task, plus L1 -> +L2 and L2 -> L3 are not -(xR)-> and -(Sx)-> (the definition of strong), which +means either L2 in L1 -> L2 is a non-recursive locker (blocked by anyone) or +the L2 in L2 -> L3, is writer (blocking anyone), therefore the holder of L1 +cannot get L2, it has to wait L2's holder to release. + +Moreover, we can have a similar conclusion for L2's holder: it has to wait L3's +holder to release, and so on. We now can prove that Lx's holder has to wait for +Lx+1's holder to release, and note that Ln+1 is L1, so we have a circular +waiting scenario and nobody can get progress, therefore a deadlock. + +Proof for necessary (Lemma 2): + +Lemma 2 is equivalent to: If there is a deadlock scenario, then there must be a +strong circle in the dependency graph. + +According to Wikipedia[1], if there is a deadlock, then there must be a circular +waiting scenario, means there are N CPU/tasks, where CPU/task P1 is waiting for +a lock held by P2, and P2 is waiting for a lock held by P3, ... and Pn is waiting +for a lock held by P1. Let's name the lock Px is waiting as Lx, so since P1 is waiting +for L1 and holding Ln, so we will have Ln -> L1 in the dependency graph. Similarly, +we have L1 -> L2, L2 -> L3, ..., Ln-1 -> Ln in the dependency graph, which means we +have a circle: + + Ln -> L1 -> L2 -> ... -> Ln + +, and now let's prove the circle is strong: + +For a lock Lx, Px contributes the dependency Lx-1 -> Lx and Px+1 contributes +the dependency Lx -> Lx+1, and since Px is waiting for Px+1 to release Lx, +so it's impossible that Lx on Px+1 is a reader and Lx on Px is a recursive +reader, because readers (no matter recursive or not) don't block recursive +readers, therefore Lx-1 -> Lx and Lx -> Lx+1 cannot be a -(xR)-> -(Sx)-> pair, +and this is true for any lock in the circle, therefore, the circle is strong. + +References: +----------- +[1]: https://en.wikipedia.org/wiki/Deadlock +[2]: Shibu, K. (2009). Intro To Embedded Systems (1st ed.). Tata McGraw-Hill -- cgit v1.2.3 From 80793c3471d90d4dc2b48deadb6413bdfe39500f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 27 Aug 2020 13:40:39 +0200 Subject: seqlock: Introduce seqcount_latch_t Latch sequence counters are a multiversion concurrency control mechanism where the seqcount_t counter even/odd value is used to switch between two copies of protected data. This allows the seqcount_t read path to safely interrupt its write side critical section (e.g. from NMIs). Initially, latch sequence counters were implemented as a single write function above plain seqcount_t: raw_write_seqcount_latch(). The read side was expected to use plain seqcount_t raw_read_seqcount(). A specialized latch read function, raw_read_seqcount_latch(), was later added. It became the standardized way for latch read paths. Due to the dependent load, it has one read memory barrier less than the plain seqcount_t raw_read_seqcount() API. Only raw_write_seqcount_latch() and raw_read_seqcount_latch() should be used with latch sequence counters. Having *unique* read and write path APIs means that latch sequence counters are actually a data type of their own -- just inappropriately overloading plain seqcount_t. Introduce seqcount_latch_t. This adds type-safety and ensures that only the correct latch-safe APIs are to be used. Not to break bisection, let the latch APIs also accept plain seqcount_t or seqcount_raw_spinlock_t. After converting all call sites to seqcount_latch_t, only that new data type will be allowed. References: 9b0fd802e8c0 ("seqcount: Add raw_write_seqcount_latch()") References: 7fc26327b756 ("seqlock: Introduce raw_read_seqcount_latch()") References: aadd6e5caaac ("time/sched_clock: Use raw_read_seqcount_latch()") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200827114044.11173-4-a.darwish@linutronix.de --- Documentation/locking/seqlock.rst | 18 +++++++ include/linux/seqlock.h | 104 ++++++++++++++++++++++++++------------ 2 files changed, 91 insertions(+), 31 deletions(-) (limited to 'Documentation') diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index 62c5ad98c11c..a334b584f2b3 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -139,6 +139,24 @@ with the associated LOCKTYPE lock acquired. Read path: same as in :ref:`seqcount_t`. + +.. _seqcount_latch_t: + +Latch sequence counters (``seqcount_latch_t``) +---------------------------------------------- + +Latch sequence counters are a multiversion concurrency control mechanism +where the embedded seqcount_t counter even/odd value is used to switch +between two copies of protected data. This allows the sequence counter +read path to safely interrupt its own write side critical section. + +Use seqcount_latch_t when the write side sections cannot be protected +from interruption by readers. This is typically the case when the read +side can be invoked from NMI handlers. + +Check `raw_write_seqcount_latch()` for more information. + + .. _seqlock_t: Sequential locks (``seqlock_t``) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 300cbf312546..88b917d4ebde 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -587,34 +587,76 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } -/** - * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants +/* + * Latch sequence counters (seqcount_latch_t) * - * Use seqcount_t latching to switch between two storage places protected - * by a sequence counter. Doing so allows having interruptible, preemptible, - * seqcount_t write side critical sections. + * A sequence counter variant where the counter even/odd value is used to + * switch between two copies of protected data. This allows the read path, + * typically NMIs, to safely interrupt the write side critical section. * - * Check raw_write_seqcount_latch() for more details and a full reader and - * writer usage example. + * As the write sections are fully preemptible, no special handling for + * PREEMPT_RT is needed. + */ +typedef struct { + seqcount_t seqcount; +} seqcount_latch_t; + +/** + * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t + * @seq_name: Name of the seqcount_latch_t instance + */ +#define SEQCNT_LATCH_ZERO(seq_name) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ +} + +/** + * seqcount_latch_init() - runtime initializer for seqcount_latch_t + * @s: Pointer to the seqcount_latch_t instance + */ +static inline void seqcount_latch_init(seqcount_latch_t *s) +{ + seqcount_init(&s->seqcount); +} + +/** + * raw_read_seqcount_latch() - pick even/odd latch data copy + * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t + * + * See raw_write_seqcount_latch() for details and a full reader/writer + * usage example. * * Return: sequence counter raw value. Use the lowest bit as an index for - * picking which data copy to read. The full counter value must then be - * checked with read_seqcount_retry(). + * picking which data copy to read. The full counter must then be checked + * with read_seqcount_latch_retry(). */ -#define raw_read_seqcount_latch(s) \ - raw_read_seqcount_t_latch(__seqcount_ptr(s)) +#define raw_read_seqcount_latch(s) \ +({ \ + /* \ + * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). \ + * Due to the dependent load, a full smp_rmb() is not needed. \ + */ \ + _Generic(*(s), \ + seqcount_t: READ_ONCE(((seqcount_t *)s)->sequence), \ + seqcount_raw_spinlock_t: READ_ONCE(((seqcount_raw_spinlock_t *)s)->seqcount.sequence), \ + seqcount_latch_t: READ_ONCE(((seqcount_latch_t *)s)->seqcount.sequence)); \ +}) -static inline int raw_read_seqcount_t_latch(seqcount_t *s) +/** + * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * @s: Pointer to seqcount_latch_t + * @start: count, from raw_read_seqcount_latch() + * + * Return: true if a read section retry is required, else false + */ +static inline int +read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) { - /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ - int seq = READ_ONCE(s->sequence); /* ^^^ */ - return seq; + return read_seqcount_retry(&s->seqcount, start); } /** - * raw_write_seqcount_latch() - redirect readers to even/odd copy - * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants + * raw_write_seqcount_latch() - redirect latch readers to even/odd copy + * @s: Pointer to seqcount_t, seqcount_raw_spinlock_t, or seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -633,7 +675,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * The basic form is a data structure like:: * * struct latch_struct { - * seqcount_t seq; + * seqcount_latch_t seq; * struct data_struct data[2]; * }; * @@ -643,13 +685,13 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * void latch_modify(struct latch_struct *latch, ...) * { * smp_wmb(); // Ensure that the last data[1] update is visible - * latch->seq++; + * latch->seq.sequence++; * smp_wmb(); // Ensure that the seqcount update is visible * * modify(latch->data[0], ...); * * smp_wmb(); // Ensure that the data[0] update is visible - * latch->seq++; + * latch->seq.sequence++; * smp_wmb(); // Ensure that the seqcount update is visible * * modify(latch->data[1], ...); @@ -668,8 +710,8 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * - * // read_seqcount_retry() includes needed smp_rmb() - * } while (read_seqcount_retry(&latch->seq, seq)); + * // This includes needed smp_rmb() + * } while (read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } @@ -693,14 +735,14 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -#define raw_write_seqcount_latch(s) \ - raw_write_seqcount_t_latch(__seqcount_ptr(s)) - -static inline void raw_write_seqcount_t_latch(seqcount_t *s) -{ - smp_wmb(); /* prior stores before incrementing "sequence" */ - s->sequence++; - smp_wmb(); /* increment "sequence" before following stores */ +#define raw_write_seqcount_latch(s) \ +{ \ + smp_wmb(); /* prior stores before incrementing "sequence" */ \ + _Generic(*(s), \ + seqcount_t: ((seqcount_t *)s)->sequence++, \ + seqcount_raw_spinlock_t:((seqcount_raw_spinlock_t *)s)->seqcount.sequence++, \ + seqcount_latch_t: ((seqcount_latch_t *)s)->seqcount.sequence++); \ + smp_wmb(); /* increment "sequence" before following stores */ \ } /* -- cgit v1.2.3